diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2017-10-12 11:02:50 +0200 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2017-10-12 11:02:50 +0200 |
commit | 331b57d14829c49d75076779cdc54d7e4537bbf0 (patch) | |
tree | 79a70eadb04ed2fd65d3280021ec6ad5266d1cc1 /arch/x86 | |
parent | 79761ce80aa0232157e428bde28c0cef6d43ac5f (diff) | |
parent | e43b3b58548051f8809391eb7bec7a27ed3003ea (diff) |
Merge branch 'irq/urgent' into x86/apic
Pick up core changes which affect the vector rework.
Diffstat (limited to 'arch/x86')
38 files changed, 561 insertions, 494 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 829e89cfcee2..9fb9a1f1e47b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4409,10 +4409,9 @@ static __init int fixup_ht_bug(void) return 0; } - if (lockup_detector_suspend() != 0) { - pr_debug("failed to disable PMU erratum BJ122, BV98, HSD29 workaround\n"); - return 0; - } + cpus_read_lock(); + + hardlockup_detector_perf_stop(); x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED); @@ -4420,9 +4419,7 @@ static __init int fixup_ht_bug(void) x86_pmu.commit_scheduling = NULL; x86_pmu.stop_scheduling = NULL; - lockup_detector_resume(); - - cpus_read_lock(); + hardlockup_detector_perf_restart(); for_each_online_cpu(c) free_excl_cntrs(c); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 4cf100ff2a37..72db0664a53d 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -552,6 +552,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_X, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), @@ -560,6 +561,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT, glm_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_DENVERTON, glm_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GEMINI_LAKE, glm_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 8e2457cb6b4a..005908ee9333 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -775,6 +775,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_DENVERTON, hsw_rapl_init), + + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GEMINI_LAKE, hsw_rapl_init), {}, }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index db1fe377e6dd..a7196818416a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3462,7 +3462,7 @@ static struct intel_uncore_ops skx_uncore_iio_ops = { static struct intel_uncore_type skx_uncore_iio = { .name = "iio", .num_counters = 4, - .num_boxes = 5, + .num_boxes = 6, .perf_ctr_bits = 48, .event_ctl = SKX_IIO0_MSR_PMON_CTL0, .perf_ctr = SKX_IIO0_MSR_PMON_CTR0, @@ -3492,7 +3492,7 @@ static const struct attribute_group skx_uncore_format_group = { static struct intel_uncore_type skx_uncore_irp = { .name = "irp", .num_counters = 2, - .num_boxes = 5, + .num_boxes = 6, .perf_ctr_bits = 48, .event_ctl = SKX_IRP0_MSR_PMON_CTL0, .perf_ctr = SKX_IRP0_MSR_PMON_CTR0, diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 4bb3ec69e8ea..06723671ae4e 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -63,6 +63,14 @@ static bool test_intel(int idx) case INTEL_FAM6_ATOM_SILVERMONT1: case INTEL_FAM6_ATOM_SILVERMONT2: case INTEL_FAM6_ATOM_AIRMONT: + + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_ATOM_DENVERTON: + + case INTEL_FAM6_ATOM_GEMINI_LAKE: + + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: if (idx == PERF_MSR_SMI) return true; break; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index e0bb46c02857..0e2a5edbce00 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -231,7 +231,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; - if (fpu->fpstate_active) { + if (fpu->initialized) { unsigned long fx_aligned, math_size; sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index c1eadbaf1115..b0dc91f4bedc 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -11,10 +11,12 @@ # define __ASM_FORM_COMMA(x) " " #x "," #endif -#ifdef CONFIG_X86_32 +#ifndef __x86_64__ +/* 32 bit */ # define __ASM_SEL(a,b) __ASM_FORM(a) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else +/* 64 bit */ # define __ASM_SEL(a,b) __ASM_FORM(b) # define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif @@ -139,8 +141,8 @@ * gets set up by the containing function. If you forget to do this, objtool * may print a "call without frame pointer save/setup" warning. */ -register unsigned int __asm_call_sp asm("esp"); -#define ASM_CALL_CONSTRAINT "+r" (__asm_call_sp) +register unsigned long current_stack_pointer asm(_ASM_SP); +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) #endif #endif /* _ASM_X86_ASM_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 554cdb205d17..e3221ffa304e 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -23,11 +23,9 @@ /* * High level FPU state handling functions: */ -extern void fpu__activate_curr(struct fpu *fpu); -extern void fpu__activate_fpstate_read(struct fpu *fpu); -extern void fpu__activate_fpstate_write(struct fpu *fpu); -extern void fpu__current_fpstate_write_begin(void); -extern void fpu__current_fpstate_write_end(void); +extern void fpu__initialize(struct fpu *fpu); +extern void fpu__prepare_read(struct fpu *fpu); +extern void fpu__prepare_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); @@ -120,20 +118,11 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); err; \ }) -#define check_insn(insn, output, input...) \ -({ \ - int err; \ +#define kernel_insn(insn, output, input...) \ asm volatile("1:" #insn "\n\t" \ "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_fprestore) \ + : output : input) static inline int copy_fregs_to_user(struct fregs_state __user *fx) { @@ -153,20 +142,16 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) { - int err; - if (IS_ENABLED(CONFIG_X86_32)) { - err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + kernel_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } else { if (IS_ENABLED(CONFIG_AS_FXSAVEQ)) { - err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); + kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); } else { /* See comment in copy_fxregs_to_kernel() below. */ - err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); + kernel_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); } } - /* Copying from a kernel buffer to FPU registers should never fail: */ - WARN_ON_FPU(err); } static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) @@ -183,9 +168,7 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) static inline void copy_kernel_to_fregs(struct fregs_state *fx) { - int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - - WARN_ON_FPU(err); + kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } static inline int copy_user_to_fregs(struct fregs_state __user *fx) @@ -281,18 +264,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * Use XRSTORS to restore context if it is enabled. XRSTORS supports compact * XSAVE area format. */ -#define XSTATE_XRESTORE(st, lmask, hmask, err) \ +#define XSTATE_XRESTORE(st, lmask, hmask) \ asm volatile(ALTERNATIVE(XRSTOR, \ XRSTORS, X86_FEATURE_XSAVES) \ "\n" \ - "xor %[err], %[err]\n" \ "3:\n" \ - ".pushsection .fixup,\"ax\"\n" \ - "4: movl $-2, %[err]\n" \ - "jmp 3b\n" \ - ".popsection\n" \ - _ASM_EXTABLE(661b, 4b) \ - : [err] "=r" (err) \ + _ASM_EXTABLE_HANDLE(661b, 3b, ex_handler_fprestore)\ + : \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") @@ -336,7 +314,10 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) else XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); - /* We should never fault when copying from a kernel buffer: */ + /* + * We should never fault when copying from a kernel buffer, and the FPU + * state we set at boot time should be valid. + */ WARN_ON_FPU(err); } @@ -350,7 +331,7 @@ static inline void copy_xregs_to_kernel(struct xregs_state *xstate) u32 hmask = mask >> 32; int err; - WARN_ON(!alternatives_patched); + WARN_ON_FPU(!alternatives_patched); XSTATE_XSAVE(xstate, lmask, hmask, err); @@ -365,12 +346,8 @@ static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; - int err; - - XSTATE_XRESTORE(xstate, lmask, hmask, err); - /* We should never fault when copying from a kernel buffer: */ - WARN_ON_FPU(err); + XSTATE_XRESTORE(xstate, lmask, hmask); } /* @@ -526,38 +503,17 @@ static inline int fpregs_state_valid(struct fpu *fpu, unsigned int cpu) */ static inline void fpregs_deactivate(struct fpu *fpu) { - WARN_ON_FPU(!fpu->fpregs_active); - - fpu->fpregs_active = 0; this_cpu_write(fpu_fpregs_owner_ctx, NULL); trace_x86_fpu_regs_deactivated(fpu); } static inline void fpregs_activate(struct fpu *fpu) { - WARN_ON_FPU(fpu->fpregs_active); - - fpu->fpregs_active = 1; this_cpu_write(fpu_fpregs_owner_ctx, fpu); trace_x86_fpu_regs_activated(fpu); } /* - * The question "does this thread have fpu access?" - * is slightly racy, since preemption could come in - * and revoke it immediately after the test. - * - * However, even in that very unlikely scenario, - * we can just assume we have FPU access - typically - * to save the FP state - we'll just take a #NM - * fault and get the FPU access back. - */ -static inline int fpregs_active(void) -{ - return current->thread.fpu.fpregs_active; -} - -/* * FPU state switching for scheduling. * * This is a two-stage process: @@ -571,14 +527,13 @@ static inline int fpregs_active(void) static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) { - if (old_fpu->fpregs_active) { + if (old_fpu->initialized) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else old_fpu->last_cpu = cpu; /* But leave fpu_fpregs_owner_ctx! */ - old_fpu->fpregs_active = 0; trace_x86_fpu_regs_deactivated(old_fpu); } else old_fpu->last_cpu = -1; @@ -595,7 +550,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu) static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu) { bool preload = static_cpu_has(X86_FEATURE_FPU) && - new_fpu->fpstate_active; + new_fpu->initialized; if (preload) { if (!fpregs_state_valid(new_fpu, cpu)) @@ -617,8 +572,7 @@ static inline void user_fpu_begin(void) struct fpu *fpu = ¤t->thread.fpu; preempt_disable(); - if (!fpregs_active()) - fpregs_activate(fpu); + fpregs_activate(fpu); preempt_enable(); } diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3c80f5b9c09d..a1520575d86b 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -68,6 +68,9 @@ struct fxregs_state { /* Default value for fxregs_state.mxcsr: */ #define MXCSR_DEFAULT 0x1f80 +/* Copy both mxcsr & mxcsr_flags with a single u64 memcpy: */ +#define MXCSR_AND_FLAGS_SIZE sizeof(u64) + /* * Software based FPU emulation state. This is arbitrary really, * it matches the x87 format to make it easier to understand: @@ -290,36 +293,13 @@ struct fpu { unsigned int last_cpu; /* - * @fpstate_active: + * @initialized: * - * This flag indicates whether this context is active: if the task + * This flag indicates whether this context is initialized: if the task * is not running then we can restore from this context, if the task * is running then we should save into this context. */ - unsigned char fpstate_active; - - /* - * @fpregs_active: - * - * This flag determines whether a given context is actively - * loaded into the FPU's registers and that those registers - * represent the task's current FPU state. - * - * Note the interaction with fpstate_active: - * - * # task does not use the FPU: - * fpstate_active == 0 - * - * # task uses the FPU and regs are active: - * fpstate_active == 1 && fpregs_active == 1 - * - * # the regs are inactive but still match fpstate: - * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu - * - * The third state is what we use for the lazy restore optimization - * on lazy-switching CPUs. - */ - unsigned char fpregs_active; + unsigned char initialized; /* * @state: diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 1b2799e0699a..83fee2469eb7 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -48,8 +48,12 @@ void fpu__xstate_clear_all_cpu_caps(void); void *get_xsave_addr(struct xregs_state *xsave, int xstate); const void *get_xsave_field_ptr(int xstate_field); int using_compacted_format(void); -int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf, - void __user *ubuf, struct xregs_state *xsave); -int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, - struct xregs_state *xsave); +int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); +int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); +int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); +int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); + +/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ +extern int validate_xstate_header(const struct xstate_header *hdr); + #endif diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index bc62e7cbf1b1..59ad3d132353 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,7 +88,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); void __init kvm_guest_init(void); -void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_pf_reason(void); extern void kvm_disable_steal_time(void); @@ -103,7 +103,7 @@ static inline void kvm_spinlock_init(void) #else /* CONFIG_KVM_GUEST */ #define kvm_guest_init() do {} while (0) -#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wait(T, I) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 5161da1a0fa0..89e7eeb5cec1 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -158,17 +158,6 @@ struct thread_info { */ #ifndef __ASSEMBLY__ -static inline unsigned long current_stack_pointer(void) -{ - unsigned long sp; -#ifdef CONFIG_X86_64 - asm("mov %%rsp,%0" : "=g" (sp)); -#else - asm("mov %%esp,%0" : "=g" (sp)); -#endif - return sp; -} - /* * Walks up the stack frames to make sure that the specified object is * entirely contained by a single stack frame. diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h index 342e59789fcd..39f7a27bef13 100644 --- a/arch/x86/include/asm/trace/fpu.h +++ b/arch/x86/include/asm/trace/fpu.h @@ -12,25 +12,22 @@ DECLARE_EVENT_CLASS(x86_fpu, TP_STRUCT__entry( __field(struct fpu *, fpu) - __field(bool, fpregs_active) - __field(bool, fpstate_active) + __field(bool, initialized) __field(u64, xfeatures) __field(u64, xcomp_bv) ), TP_fast_assign( __entry->fpu = fpu; - __entry->fpregs_active = fpu->fpregs_active; - __entry->fpstate_active = fpu->fpstate_active; + __entry->initialized = fpu->initialized; if (boot_cpu_has(X86_FEATURE_OSXSAVE)) { __entry->xfeatures = fpu->state.xsave.header.xfeatures; __entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv; } ), - TP_printk("x86/fpu: %p fpregs_active: %d fpstate_active: %d xfeatures: %llx xcomp_bv: %llx", + TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx", __entry->fpu, - __entry->fpregs_active, - __entry->fpstate_active, + __entry->initialized, __entry->xfeatures, __entry->xcomp_bv ) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 78e8fcc87d4c..4b892917edeb 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -337,7 +337,7 @@ do { \ _ASM_EXTABLE(1b, 4b) \ _ASM_EXTABLE(2b, 4b) \ : "=r" (retval), "=&A"(x) \ - : "m" (__m(__ptr)), "m" __m(((u32 *)(__ptr)) + 1), \ + : "m" (__m(__ptr)), "m" __m(((u32 __user *)(__ptr)) + 1), \ "i" (errret), "0" (retval)); \ }) diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 128a1a0b1450..7cb282e9e587 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -551,13 +551,13 @@ static inline void MULTI_update_descriptor(struct multicall_entry *mcl, u64 maddr, struct desc_struct desc) { - u32 *p = (u32 *) &desc; - mcl->op = __HYPERVISOR_update_descriptor; if (sizeof(maddr) == sizeof(long)) { mcl->args[0] = maddr; mcl->args[1] = *(unsigned long *)&desc; } else { + u32 *p = (u32 *)&desc; + mcl->args[0] = maddr; mcl->args[1] = maddr >> 32; mcl->args[2] = *p++; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e1114f070c2d..f92a6593de1e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -100,7 +100,7 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); - if (fpu->fpregs_active) { + if (fpu->initialized) { /* * Ignore return value -- we don't care if reg state * is clobbered. @@ -116,7 +116,7 @@ void __kernel_fpu_end(void) { struct fpu *fpu = ¤t->thread.fpu; - if (fpu->fpregs_active) + if (fpu->initialized) copy_kernel_to_fpregs(&fpu->state); kernel_fpu_enable(); @@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu) preempt_disable(); trace_x86_fpu_before_save(fpu); - if (fpu->fpregs_active) { + if (fpu->initialized) { if (!copy_fpregs_to_fpstate(fpu)) { copy_kernel_to_fpregs(&fpu->state); } @@ -189,10 +189,9 @@ EXPORT_SYMBOL_GPL(fpstate_init); int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { - dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1; - if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU)) + if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU)) return 0; WARN_ON_FPU(src_fpu != ¤t->thread.fpu); @@ -206,26 +205,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) /* * Save current FPU registers directly into the child * FPU context, without any memory-to-memory copying. - * In lazy mode, if the FPU context isn't loaded into - * fpregs, CR0.TS will be set and do_device_not_available - * will load the FPU context. * - * We have to do all this with preemption disabled, - * mostly because of the FNSAVE case, because in that - * case we must not allow preemption in the window - * between the FNSAVE and us marking the context lazy. - * - * It shouldn't be an issue as even FNSAVE is plenty - * fast in terms of critical section length. + * ( The function 'fails' in the FNSAVE case, which destroys + * register contents so we have to copy them back. ) */ - preempt_disable(); if (!copy_fpregs_to_fpstate(dst_fpu)) { - memcpy(&src_fpu->state, &dst_fpu->state, - fpu_kernel_xstate_size); - + memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size); copy_kernel_to_fpregs(&src_fpu->state); } - preempt_enable(); trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); @@ -237,45 +224,48 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * Activate the current task's in-memory FPU context, * if it has not been used before: */ -void fpu__activate_curr(struct fpu *fpu) +void fpu__initialize(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); - if (!fpu->fpstate_active) { + if (!fpu->initialized) { fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); trace_x86_fpu_activate_state(fpu); /* Safe to do for the current task: */ - fpu->fpstate_active = 1; + fpu->initialized = 1; } } -EXPORT_SYMBOL_GPL(fpu__activate_curr); +EXPORT_SYMBOL_GPL(fpu__initialize); /* * This function must be called before we read a task's fpstate. * - * If the task has not used the FPU before then initialize its - * fpstate. + * There's two cases where this gets called: + * + * - for the current task (when coredumping), in which case we have + * to save the latest FPU registers into the fpstate, + * + * - or it's called for stopped tasks (ptrace), in which case the + * registers were already saved by the context-switch code when + * the task scheduled out - we only have to initialize the registers + * if they've never been initialized. * * If the task has used the FPU before then save it. */ -void fpu__activate_fpstate_read(struct fpu *fpu) +void fpu__prepare_read(struct fpu *fpu) { - /* - * If fpregs are active (in the current CPU), then - * copy them to the fpstate: - */ - if (fpu->fpregs_active) { + if (fpu == ¤t->thread.fpu) { fpu__save(fpu); } else { - if (!fpu->fpstate_active) { + if (!fpu->initialized) { fpstate_init(&fpu->state); trace_x86_fpu_init_state(fpu); trace_x86_fpu_activate_state(fpu); /* Safe to do for current and for stopped child tasks: */ - fpu->fpstate_active = 1; + fpu->initialized = 1; } } } @@ -283,17 +273,17 @@ void fpu__activate_fpstate_read(struct fpu *fpu) /* * This function must be called before we write a task's fpstate. * - * If the task has used the FPU before then unlazy it. + * If the task has used the FPU before then invalidate any cached FPU registers. * If the task has not used the FPU before then initialize its fpstate. * * After this function call, after registers in the fpstate are * modified and the child task has woken up, the child task will * restore the modified FPU state from the modified context. If we - * didn't clear its lazy status here then the lazy in-registers + * didn't clear its cached status here then the cached in-registers * state pending on its former CPU could be restored, corrupting * the modifications. */ -void fpu__activate_fpstate_write(struct fpu *fpu) +void fpu__prepare_write(struct fpu *fpu) { /* * Only stopped child tasks can be used to modify the FPU @@ -301,8 +291,8 @@ void fpu__activate_fpstate_write(struct fpu *fpu) */ WARN_ON_FPU(fpu == ¤t->thread.fpu); - if (fpu->fpstate_active) { - /* Invalidate any lazy state: */ + if (fpu->initialized) { + /* Invalidate any cached state: */ __fpu_invalidate_fpregs_state(fpu); } else { fpstate_init(&fpu->state); @@ -310,74 +300,11 @@ void fpu__activate_fpstate_write(struct fpu *fpu) trace_x86_fpu_activate_state(fpu); /* Safe to do for stopped child tasks: */ - fpu->fpstate_active = 1; + fpu->initialized = 1; } } /* - * This function must be called before we write the current - * task's fpstate. - * - * This call gets the current FPU register state and moves - * it in to the 'fpstate'. Preemption is disabled so that - * no writes to the 'fpstate' can occur from context - * swiches. - * - * Must be followed by a fpu__current_fpstate_write_end(). - */ -void fpu__current_fpstate_write_begin(void) -{ - struct fpu *fpu = ¤t->thread.fpu; - |