summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-12 14:27:49 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-12 14:27:49 -0800
commit518bacf5a569d111e256d58b9fbc8d7b80ec42ea (patch)
tree53aa3297fbd3cf98caa592dec5b3be4e01646ff4 /arch/x86
parent535b2f73f6f60fb227b700136c134c5d7c8f8ad3 (diff)
parent064e6a8ba61a751625478f656c6f76a6f37a009e (diff)
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 FPU updates from Ingo Molnar: "The main changes in this cycle were: - do a large round of simplifications after all CPUs do 'eager' FPU context switching in v4.9: remove CR0 twiddling, remove leftover eager/lazy bts, etc (Andy Lutomirski) - more FPU code simplifications: remove struct fpu::counter, clarify nomenclature, remove unnecessary arguments/functions and better structure the code (Rik van Riel)" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/fpu: Remove clts() x86/fpu: Remove stts() x86/fpu: Handle #NM without FPU emulation as an error x86/fpu, lguest: Remove CR0.TS support x86/fpu, kvm: Remove host CR0.TS manipulation x86/fpu: Remove irq_ts_save() and irq_ts_restore() x86/fpu: Stop saving and restoring CR0.TS in fpu__init_check_bugs() x86/fpu: Get rid of two redundant clts() calls x86/fpu: Finish excising 'eagerfpu' x86/fpu: Split old_fpu & new_fpu handling into separate functions x86/fpu: Remove 'cpu' argument from __cpu_invalidate_fpregs_state() x86/fpu: Split old & new FPU code paths x86/fpu: Remove __fpregs_(de)activate() x86/fpu: Rename lazy restore functions to "register state valid" x86/fpu, kvm: Remove KVM vcpu->fpu_counter x86/fpu: Remove struct fpu::counter x86/fpu: Remove use_eager_fpu() x86/fpu: Remove the XFEATURE_MASK_EAGER/LAZY distinction x86/fpu: Hard-disable lazy FPU mode x86/crypto, x86/fpu: Remove X86_FEATURE_EAGER_FPU #ifdef from the crc32c code
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c22
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/fpu/api.h10
-rw-r--r--arch/x86/include/asm/fpu/internal.h139
-rw-r--r--arch/x86/include/asm/fpu/types.h34
-rw-r--r--arch/x86/include/asm/fpu/xstate.h17
-rw-r--r--arch/x86/include/asm/lguest_hcall.h1
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/special_insns.h13
-rw-r--r--arch/x86/include/asm/trace/fpu.h5
-rw-r--r--arch/x86/kernel/fpu/bugs.c7
-rw-r--r--arch/x86/kernel/fpu/core.c74
-rw-r--r--arch/x86/kernel/fpu/init.c107
-rw-r--r--arch/x86/kernel/fpu/signal.c8
-rw-r--r--arch/x86/kernel/fpu/xstate.c9
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c2
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c2
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/traps.c20
-rw-r--r--arch/x86/kvm/cpuid.c4
-rw-r--r--arch/x86/kvm/vmx.c12
-rw-r--r--arch/x86/kvm/x86.c19
-rw-r--r--arch/x86/lguest/boot.c29
-rw-r--r--arch/x86/mm/pkeys.c3
-rw-r--r--arch/x86/xen/enlighten.c13
29 files changed, 100 insertions, 471 deletions
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 0857b1a1de3b..c194d5717ae5 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -48,26 +48,13 @@
#ifdef CONFIG_X86_64
/*
* use carryless multiply version of crc32c when buffer
- * size is >= 512 (when eager fpu is enabled) or
- * >= 1024 (when eager fpu is disabled) to account
+ * size is >= 512 to account
* for fpu state save/restore overhead.
*/
-#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
-#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
+#define CRC32C_PCL_BREAKEVEN 512
asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
unsigned int crc_init);
-static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
-#if defined(X86_FEATURE_EAGER_FPU)
-#define set_pcl_breakeven_point() \
-do { \
- if (!use_eager_fpu()) \
- crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
-} while (0)
-#else
-#define set_pcl_breakeven_point() \
- (crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
-#endif
#endif /* CONFIG_X86_64 */
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
@@ -190,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save/restore overhead
*/
- if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+ if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
kernel_fpu_begin();
*crcp = crc_pcl(data, len, *crcp);
kernel_fpu_end();
@@ -202,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
- if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+ if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
kernel_fpu_begin();
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
kernel_fpu_end();
@@ -261,7 +248,6 @@ static int __init crc32c_intel_mod_init(void)
alg.update = crc32c_pcl_intel_update;
alg.finup = crc32c_pcl_intel_finup;
alg.digest = crc32c_pcl_intel_digest;
- set_pcl_breakeven_point();
}
#endif
return crypto_register_shash(&alg);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4dba597c5807..e83f972b0a14 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -104,7 +104,6 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
-#define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index 1429a7c736db..0877ae018fc9 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -27,16 +27,6 @@ extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
/*
- * Some instructions like VIA's padlock instructions generate a spurious
- * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context as well. To prevent these kernel instructions
- * in interrupt context interacting wrongly with other user/kernel fpu usage, we
- * should use them only in the context of irq_ts_save/restore()
- */
-extern int irq_ts_save(void);
-extern void irq_ts_restore(int TS_state);
-
-/*
* Query the presence of one or more xfeatures. Works on any legacy CPU as well.
*
* If 'feature_name' is set then put a human-readable description of
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 2737366ea583..d4a684997497 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
/*
* FPU related CPU feature flag helper routines:
*/
-static __always_inline __pure bool use_eager_fpu(void)
-{
- return static_cpu_has(X86_FEATURE_EAGER_FPU);
-}
-
static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
@@ -484,42 +479,42 @@ extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size)
DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
/*
- * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
- * on this CPU.
+ * The in-register FPU state for an FPU context on a CPU is assumed to be
+ * valid if the fpu->last_cpu matches the CPU, and the fpu_fpregs_owner_ctx
+ * matches the FPU.
*
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
+ * If the FPU register state is valid, the kernel can skip restoring the
+ * FPU state from memory.
+ *
+ * Any code that clobbers the FPU registers or updates the in-memory
+ * FPU state for a task MUST let the rest of the kernel know that the
+ * FPU registers are no longer valid for this task.
+ *
+ * Either one of these invalidation functions is enough. Invalidate
+ * a resource you control: CPU if using the CPU for something else
+ * (with preemption disabled), FPU for the current task, or a task that
+ * is prevented from running by the current task.
*/
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+static inline void __cpu_invalidate_fpregs_state(void)
{
- per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
+ __this_cpu_write(fpu_fpregs_owner_ctx, NULL);
}
-static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
-{
- return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
-}
-
-
-/*
- * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
- * idiom, which is then paired with the sw-flag (fpregs_active) later on:
- */
-
-static inline void __fpregs_activate_hw(void)
+static inline void __fpu_invalidate_fpregs_state(struct fpu *fpu)
{
- if (!use_eager_fpu())
- clts();
+ fpu->last_cpu = -1;
}
-static inline void __fpregs_deactivate_hw(void)
+static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu)
{
- if (!use_eager_fpu())
- stts();
+ return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
}
-/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
-static inline void __fpregs_deactivate(struct fpu *fpu)
+/*
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own:
+ */
+static inline void fpregs_deactivate(struct fpu *fpu)
{
WARN_ON_FPU(!fpu->fpregs_active);
@@ -528,8 +523,7 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
trace_x86_fpu_regs_deactivated(fpu);
}
-/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
-static inline void __fpregs_activate(struct fpu *fpu)
+static inline void fpregs_activate(struct fpu *fpu)
{
WARN_ON_FPU(fpu->fpregs_active);
@@ -554,51 +548,19 @@ static inline int fpregs_active(void)
}
/*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
- * These generally need preemption protection to work,
- * do try to avoid using these on their own.
- */
-static inline void fpregs_activate(struct fpu *fpu)
-{
- __fpregs_activate_hw();
- __fpregs_activate(fpu);
-}
-
-static inline void fpregs_deactivate(struct fpu *fpu)
-{
- __fpregs_deactivate(fpu);
- __fpregs_deactivate_hw();
-}
-
-/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
*
- * - switch_fpu_prepare() saves the old state and
- * sets the new state of the CR0.TS bit. This is
- * done within the context of the old process.
+ * - switch_fpu_prepare() saves the old state.
+ * This is done within the context of the old process.
*
* - switch_fpu_finish() restores the new state as
* necessary.
*/
-typedef struct { int preload; } fpu_switch_t;
-
-static inline fpu_switch_t
-switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
+static inline void
+switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- fpu_switch_t fpu;
-
- /*
- * If the task has used the math, pre-load the FPU on xsave processors
- * or if the past 5 consecutive context-switches used math.
- */
- fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->fpstate_active &&
- (use_eager_fpu() || new_fpu->counter > 5);
-
if (old_fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
@@ -608,29 +570,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
/* But leave fpu_fpregs_owner_ctx! */
old_fpu->fpregs_active = 0;
trace_x86_fpu_regs_deactivated(old_fpu);
-
- /* Don't change CR0.TS if we just switch! */
- if (fpu.preload) {
- new_fpu->counter++;
- __fpregs_activate(new_fpu);
- trace_x86_fpu_regs_activated(new_fpu);
- prefetch(&new_fpu->state);
- } else {
- __fpregs_deactivate_hw();
- }
- } else {
- old_fpu->counter = 0;
+ } else
old_fpu->last_cpu = -1;
- if (fpu.preload) {
- new_fpu->counter++;
- if (fpu_want_lazy_restore(new_fpu, cpu))
- fpu.preload = 0;
- else
- prefetch(&new_fpu->state);
- fpregs_activate(new_fpu);
- }
- }
- return fpu;
}
/*
@@ -638,15 +579,19 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
*/
/*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
+ * Set up the userspace FPU context for the new task, if the task
+ * has used the FPU.
*/
-static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
+static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
{
- if (fpu_switch.preload)
- copy_kernel_to_fpregs(&new_fpu->state);
+ bool preload = static_cpu_has(X86_FEATURE_FPU) &&
+ new_fpu->fpstate_active;
+
+ if (preload) {
+ if (!fpregs_state_valid(new_fpu, cpu))
+ copy_kernel_to_fpregs(&new_fpu->state);
+ fpregs_activate(new_fpu);
+ }
}
/*
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 48df486b02f9..3c80f5b9c09d 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -322,17 +322,6 @@ struct fpu {
unsigned char fpregs_active;
/*
- * @counter:
- *
- * This counter contains the number of consecutive context switches
- * during which the FPU stays used. If this is over a threshold, the
- * lazy FPU restore logic becomes eager, to save the trap overhead.
- * This is an unsigned char so that after 256 iterations the counter
- * wraps and the context switch behavior turns lazy again; this is to
- * deal with bursty apps that only use the FPU for a short time:
- */
- unsigned char counter;
- /*
* @state:
*
* In-memory copy of all FPU registers that we save/restore
@@ -340,29 +329,6 @@ struct fpu {
* the registers in the FPU are more recent than this state
* copy. If the task context-switches away then they get
* saved here and represent the FPU state.
- *
- * After context switches there may be a (short) time period
- * during which the in-FPU hardware registers are unchanged
- * and still perfectly match this state, if the tasks
- * scheduled afterwards are not using the FPU.
- *
- * This is the 'lazy restore' window of optimization, which
- * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
- *
- * We detect whether a subsequent task uses the FPU via setting
- * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
- *
- * During this window, if the task gets scheduled again, we
- * might be able to skip having to do a restore from this
- * memory buffer to the hardware registers - at the cost of
- * incurring the overhead of #NM fault traps.
- *
- * Note that on modern CPUs that support the XSAVEOPT (or other
- * optimized XSAVE instructions), we don't use #NM traps anymore,
- * as the hardware can track whether FPU registers need saving
- * or not. On such CPUs we activate the non-lazy ('eagerfpu')
- * logic, which unconditionally saves/restores all FPU state
- * across context switches. (if FPU state exists.)
*/
union fpregs_state state;
/*
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 430bacf73074..1b2799e0699a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -21,21 +21,16 @@
/* Supervisor features */
#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT)
-/* Supported features which support lazy state saving */
-#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
+/* All currently supported features */
+#define XCNTXT_MASK (XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
- XFEATURE_MASK_Hi16_ZMM)
-
-/* Supported features which require eager state saving */
-#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | \
- XFEATURE_MASK_BNDCSR | \
- XFEATURE_MASK_PKRU)
-
-/* All currently supported features */
-#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
+ XFEATURE_MASK_Hi16_ZMM | \
+ XFEATURE_MASK_PKRU | \
+ XFEATURE_MASK_BNDREGS | \
+ XFEATURE_MASK_BNDCSR)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h
index ef01fef3eebc..6c119cfae218 100644
--- a/arch/x86/include/asm/lguest_hcall.h
+++ b/arch/x86/include/asm/lguest_hcall.h
@@ -9,7 +9,6 @@
#define LHCALL_FLUSH_TLB 5
#define LHCALL_LOAD_IDT_ENTRY 6
#define LHCALL_SET_STACK 7
-#define LHCALL_TS 8
#define LHCALL_SET_CLOCKEVENT 9
#define LHCALL_HALT 10
#define LHCALL_SET_PMD 13
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 6108b1fada2b..1eea6ca40694 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -41,11 +41,6 @@ static inline void set_debugreg(unsigned long val, int reg)
PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val);
}
-static inline void clts(void)
-{
- PVOP_VCALL0(pv_cpu_ops.clts);
-}
-
static inline unsigned long read_cr0(void)
{
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0);
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 3f2bc0f0d3e8..bb2de45a60f2 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -103,8 +103,6 @@ struct pv_cpu_ops {
unsigned long (*get_debugreg)(int regno);
void (*set_debugreg)(int regno, unsigned long value);
- void (*clts)(void);
-
unsigned long (*read_cr0)(void);
void (*write_cr0)(unsigned long);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 19a2224f9e16..12af3e35edfa 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,11 +6,6 @@
#include <asm/nops.h>
-static inline void native_clts(void)
-{
- asm volatile("clts");
-}
-
/*
* Volatile isn't enough to prevent the compiler from reordering the
* read/write functions for the control registers and messing everything up.
@@ -208,16 +203,8 @@ static inline void load_gs_index(unsigned selector)
#endif
-/* Clear the 'TS' bit */
-static inline void clts(void)
-{
- native_clts();
-}
-
#endif/* CONFIG_PARAVIRT */
-#define stts() write_cr0(read_cr0() | X86_CR0_TS)
-
static inline void clflush(volatile void *__p)
{
asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p));
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 9217ab1f5bf6..342e59789fcd 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -14,7 +14,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
__field(struct fpu *, fpu)
__field(bool, fpregs_active)
__field(bool, fpstate_active)
- __field(int, counter)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
@@ -23,17 +22,15 @@ DECLARE_EVENT_CLASS(x86_fpu,
__entry->fpu = fpu;
__entry->fpregs_active = fpu->fpregs_active;
__entry->fpstate_active = fpu->fpstate_active;
- __entry->counter = fpu->counter;
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
}
),
- TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d counter: %d xfeatures: %llx xcomp_bv: %llx",
+ TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
__entry->fpregs_active,
__entry->fpstate_active,
- __entry->counter,
__entry->xfeatures,
__entry->xcomp_bv
)
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index aad34aafc0e0..d913047f832c 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -23,17 +23,12 @@ static double __initdata y = 3145727.0;
*/
void __init fpu__init_check_bugs(void)
{
- u32 cr0_saved;
s32 fdiv_bug;
/* kernel_fpu_begin/end() relies on patched alternative instructions. */
if (!boot_cpu_has(X86_FEATURE_FPU))
return;
- /* We might have CR0::TS set already, clear it: */
- cr0_saved = read_cr0();
- write_cr0(cr0_saved & ~X86_CR0_TS);
-
kernel_fpu_begin();
/*
@@ -56,8 +51,6 @@ void __init fpu__init_check_bugs(void)
kernel_fpu_end();
- write_cr0(cr0_saved);
-
if (fdiv_bug) {
set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV);
pr_warn("Hmm, FPU with FDIV bug\n");
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index ebb4e95fbd74..e4e97a5355ce 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -58,27 +58,9 @@ static bool kernel_fpu_disabled(void)
return this_cpu_read(in_kernel_fpu);
}
-/*
- * Were we in an interrupt that interrupted kernel mode?
- *
- * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
- * pair does nothing at all: the thread must not have fpu (so
- * that we don't try to save the FPU state), and TS must
- * be set (so that the clts/stts pair does nothing that is
- * visible in the interrupted kernel thread).
- *
- * Except for the eagerfpu case when we return true; in the likely case
- * the thread has FPU but we are not going to set/clear TS.
- */
static bool interrupted_kernel_fpu_idle(void)
{
- if (kernel_fpu_disabled())
- return false;
-
- if (use_eager_fpu())
- return true;
-
- return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
+ return !kernel_fpu_disabled();
}
/*
@@ -125,8 +107,7 @@ void __kernel_fpu_begin(void)
*/
copy_fpregs_to_fpstate(fpu);
} else {
- this_cpu_write(fpu_fpregs_owner_ctx, NULL);
- __fpregs_activate_hw();
+ __cpu_invalidate_fpregs_state();
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -137,8 +118,6 @@ void __kernel_fpu_end(void)
if (fpu->fpregs_active)
copy_kernel_to_fpregs(&fpu->state);
- else
- __fpregs_deactivate_hw();
kernel_fpu_enable();
}
@@ -159,35 +138,6 @@ void kernel_fpu_end(void)
EXPORT_SYMBOL_GPL(kernel_fpu_end);
/*
- * CR0::TS save/restore functions:
- */
-int irq_ts_save(void)
-{
- /*
- * If in process context and not atomic, we can take a spurious DNA fault.
- * Otherwise, doing clts() in process context requires disabling preemption
- * or some heavy lifting like kernel_fpu_begin()
- */
- if (!in_atomic())
- return 0;
-
- if (read_cr0() & X86_CR0_TS) {
- clts();
- return 1;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(irq_ts_save);
-
-void irq_ts_restore(int TS_state)
-{
- if (TS_state)
- stts();
-}
-EXPORT_SYMBOL_GPL(irq_ts_restore);
-
-/*
* Save the FPU state (mark it for reload if necessary):
*
* This only ever gets called for the current task.
@@ -200,10 +150,7 @@ void fpu__save(struct fpu *fpu)
trace_x86_fpu_before_save(fpu);
if (fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(fpu)) {
- if (use_eager_fpu())
- copy_kernel_to_fpregs(&fpu->state);
- else
- fpregs_deactivate(fpu);
+ copy_kernel_to_fpregs(&fpu->state);
}
}
trace_x86_fpu_after_save(fpu);
@@ -247,7 +194,6 @@ EXPORT_SYMBOL_GPL(fpstate_init);
int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
{
- dst_fpu->counter = 0;
dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
@@ -260,8 +206,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Don't let 'init optimized' areas of the XSAVE area
* leak into the child task:
*/
- if (use_eager_fpu())
- memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
+ memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
/*
* Save current FPU registers directly into the child
@@ -283,10 +228,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
memcpy(&src_fpu->state, &dst_fpu->state,
fpu_kernel_xstate_size);
- if (use_eager_fpu())
- copy_kernel_to_fpregs(&src_fpu->state);
- else
- fpregs_deactivate(src_fpu);
+ copy_kernel_to_fpregs(&src_fpu->state);
}
preempt_enable();
@@ -366,7 +308,7 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
if (fpu->fpstate_active) {
/* Invalidate any lazy state: */
- fpu->last_cpu = -1;
+ __fpu_invalidate_fpregs_state(fpu);
} else {
fpstate_init(&fpu->state);
trace_x86_fpu_init_state(fpu);
@@ -409,7 +351,7 @@ void fpu__current_fpstate_write_begin(void)
* ensures we will not be lazy and skip a XRSTOR in the
* future.
*/
- fpu->last_cpu = -1;
+ __fpu_invalidate_fpregs_state(fpu);
}
/*
@@ -459,7 +401,6 @@ void fpu__restore(struct fpu *fpu)
trace_x86_fpu_before_restore(fpu);
fpregs_activate(fpu);
copy_kernel_to_fpregs(&fpu->state);
- fpu->counter++;
trace_x86_fpu_after_restore(fpu);
kernel_fpu_enable();
}
@@ -477,7 +418,6 @@ EXPORT_SYMBOL_GPL(fpu__restore);
void fpu__drop(struct fpu *fpu)
{
preempt_disable();
- fpu->counter = 0;
if (fpu->fpregs_active) {
/* Ignore delayed exceptions from user space */
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 2f2b8c7ccb85..60dece392b3a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -10,18 +10,6 @@
#include <linux/init.h>
/*
- * Initialize the TS bit in CR0 according to the style of context-switches
- * we are using:
- */
-static void fpu__init_cpu_ctx_switch(void)
-{
- if (!boot_cpu_has(X86_FEATURE_EAGER_FPU))
- stts();
- else
- clts();
-}
-
-/*
* Initialize the registers found in all CPUs, CR0 and CR4:
*/
static void fpu__init_cpu_generic(void)
@@ -58,7 +46,6 @@ void fpu__init_cpu(void)
{
fpu__init_cpu_generic();
fpu__init_cpu_xstate();
- fpu__init_cpu_ctx_switch();
}
/*
@@ -233,82 +220,16 @@ static void __init fpu__init_system_xstate_size_legacy(void)
}
/*
- * FPU context switching strategies:
- *
- * Against popular belief, we don't do lazy FPU saves, due to the
- * task migration complications it brings on SMP - we only do
- * lazy FPU restores.
- *
- * 'lazy' is the traditional strategy, which is based on setting
- * CR0::TS to 1 during context-switch (instead of doing a full
- * restore of the FPU state), which causes the first FPU instruction
- * after the context switch (whenever it is executed) to fault - at
- * which point we lazily restore the FPU state into FPU registers.
- *
- * Tasks are of course under no obligation to execute FPU instructions,
- * so it can easily happen that another context-switch occurs without
- * a single FPU instruction being executed. If we eventually switch
- * back to the original task (that still owns the FPU) then we have
- * not only saved the restores along the way, but we also have the
- * FPU ready to be used for the original task.
- *
- * 'lazy' is deprecated because it's almost never a performance win
- * and it's much more complicated than 'eager'.
- *
- * 'eager' switching is by default on all CPUs, there we switch the FPU
- * state during every context switch, regardless of whether the task
- * has used FPU instructions in that time slice or not. This is done
- * because modern FPU context saving instructions are able to optimize
- * state saving and restoration in hardware: they can detect both
- * unused and untouched FPU state and optimize accordingly.
- *
- * [ Note that even in 'lazy' mode we might optimize context switches
- * to use 'eager' restores, if we detect that a task is using the FPU
- * frequently. See the fpu->counter logic in fpu/internal.h for that. ]
- */
-static enum { ENABLE, DISABLE } eagerfpu = ENABLE;
-
-/*
* Find supported xfeatures based on cpu features and command-line input.
* This must be called after fpu__init_parse_early_param() is called and
* xfeatures_mask is enumerated.
*/
u64 __init fpu__get_supported_xfeatures_mask(void)
{
- /* Support all xfeatures known to us */
- if (eagerfpu != DISABLE)
- return XCNTXT_MASK;
-
- /* Warning of xfeatures being disabled for no eagerfpu mode */
- if (xfeatures_mask & XFEATURE_MASK_EAGER) {
- pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
- xfeatures_mask & XFEATURE_MASK_EAGER);
- }
-
- /* Return a mask that masks out all features requiring eagerfpu mode */
- return ~XFEATURE_MASK_EAGER;
+ return XCNTXT_MASK;
}
-/*
- * Disable features dependent on eagerfpu.
- */
-static void __init fpu__clear_eager_fpu_features(void)
-{
- setup_clear_cpu_cap(X86_FEATURE_MPX);
-}
-
-/*
- * Pick the FPU context switching strategy:
- *
- * When eagerfpu is AUTO or ENABLE, we ensure it is ENABLE if either of