diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-23 17:54:58 +0100 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-23 17:54:58 +0100 |
commit | f682a7920baf7b721d01dd317f3b532265357cbb (patch) | |
tree | 253e96be2249d704424b153c58ba58c974a9e61d /arch/x86 | |
parent | 99792e0cea1ed733cdc8d0758677981e0cbebfed (diff) | |
parent | 3a025de64bf89c84a79909069e3c24ad9e710d27 (diff) |
Merge branch 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 paravirt updates from Ingo Molnar:
"Two main changes:
- Remove no longer used parts of the paravirt infrastructure and put
large quantities of paravirt ops under a new config option
PARAVIRT_XXL=y, which is selected by XEN_PV only. (Joergen Gross)
- Enable PV spinlocks on Hyperv (Yi Sun)"
* 'x86-paravirt-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/hyperv: Enable PV qspinlock for Hyper-V
x86/hyperv: Add GUEST_IDLE_MSR support
x86/paravirt: Clean up native_patch()
x86/paravirt: Prevent redefinition of SAVE_FLAGS macro
x86/xen: Make xen_reservation_lock static
x86/paravirt: Remove unneeded mmu related paravirt ops bits
x86/paravirt: Move the Xen-only pv_mmu_ops under the PARAVIRT_XXL umbrella
x86/paravirt: Move the pv_irq_ops under the PARAVIRT_XXL umbrella
x86/paravirt: Move the Xen-only pv_cpu_ops under the PARAVIRT_XXL umbrella
x86/paravirt: Move items in pv_info under PARAVIRT_XXL umbrella
x86/paravirt: Introduce new config option PARAVIRT_XXL
x86/paravirt: Remove unused paravirt bits
x86/paravirt: Use a single ops structure
x86/paravirt: Remove clobbers from struct paravirt_patch_site
x86/paravirt: Remove clobbers parameter from paravirt patch functions
x86/paravirt: Make paravirt_patch_call() and paravirt_patch_jmp() static
x86/xen: Add SPDX identifier in arch/x86/xen files
x86/xen: Link platform-pci-unplug.o only if CONFIG_XEN_PVHVM
x86/xen: Move pv specific parts of arch/x86/xen/mmu.c to mmu_pv.c
x86/xen: Move pv irq related functions under CONFIG_XEN_PV umbrella
Diffstat (limited to 'arch/x86')
58 files changed, 909 insertions, 912 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ff425a2d286c..46d42af4501a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -523,6 +523,7 @@ config X86_VSMP bool "ScaleMP vSMP" select HYPERVISOR_GUEST select PARAVIRT + select PARAVIRT_XXL depends on X86_64 && PCI depends on X86_EXTENDED_PLATFORM depends on SMP @@ -753,6 +754,9 @@ config PARAVIRT over full virtualization. However, when run without a hypervisor the kernel is theoretically slower and slightly larger. +config PARAVIRT_XXL + bool + config PARAVIRT_DEBUG bool "paravirt-ops debugging" depends on PARAVIRT && DEBUG_KERNEL diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index a423bdb42686..a1d5918765f3 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -9,6 +9,7 @@ * paravirt and debugging variants are added.) */ #undef CONFIG_PARAVIRT +#undef CONFIG_PARAVIRT_XXL #undef CONFIG_PARAVIRT_SPINLOCKS #undef CONFIG_KASAN diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index fbbf1ba57ec6..687e47f8a796 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -783,7 +783,7 @@ GLOBAL(__begin_SYSENTER_singlestep_region) * will ignore all of the single-step traps generated in this range. */ -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV /* * Xen doesn't set %esp to be precisely what the normal SYSENTER * entry point expects, so fix it up before using the normal path. @@ -1241,7 +1241,7 @@ ENTRY(spurious_interrupt_bug) jmp common_exception END(spurious_interrupt_bug) -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV ENTRY(xen_hypervisor_callback) pushl $-1 /* orig_ax = -1 => not a system call */ SAVE_ALL @@ -1322,11 +1322,13 @@ ENTRY(xen_failsafe_callback) _ASM_EXTABLE(3b, 8b) _ASM_EXTABLE(4b, 9b) ENDPROC(xen_failsafe_callback) +#endif /* CONFIG_XEN_PV */ +#ifdef CONFIG_XEN_PVHVM BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, xen_evtchn_do_upcall) +#endif -#endif /* CONFIG_XEN */ #if IS_ENABLED(CONFIG_HYPERV) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f95dcb209fdf..7c5ce0a6c4d2 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1050,7 +1050,7 @@ ENTRY(do_softirq_own_stack) ret ENDPROC(do_softirq_own_stack) -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 /* @@ -1130,11 +1130,13 @@ ENTRY(xen_failsafe_callback) ENCODE_FRAME_POINTER jmp error_exit END(xen_failsafe_callback) +#endif /* CONFIG_XEN_PV */ +#ifdef CONFIG_XEN_PVHVM apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ xen_hvm_callback_vector xen_evtchn_do_upcall +#endif -#endif /* CONFIG_XEN */ #if IS_ENABLED(CONFIG_HYPERV) apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ @@ -1151,7 +1153,7 @@ idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry int3 do_int3 has_error_code=0 idtentry stack_segment do_stack_segment has_error_code=1 -#ifdef CONFIG_XEN +#ifdef CONFIG_XEN_PV idtentry xennmi do_nmi has_error_code=0 idtentry xendebug do_debug has_error_code=0 idtentry xenint3 do_int3 has_error_code=0 diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index b21ee65c4101..1c11f9420a82 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1,2 +1,6 @@ obj-y := hv_init.o mmu.o nested.o obj-$(CONFIG_X86_64) += hv_apic.o + +ifdef CONFIG_X86_64 +obj-$(CONFIG_PARAVIRT_SPINLOCKS) += hv_spinlock.o +endif diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c new file mode 100644 index 000000000000..a861b0456b1a --- /dev/null +++ b/arch/x86/hyperv/hv_spinlock.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V specific spinlock code. + * + * Copyright (C) 2018, Intel, Inc. + * + * Author : Yi Sun <yi.y.sun@intel.com> + */ + +#define pr_fmt(fmt) "Hyper-V: " fmt + +#include <linux/spinlock.h> + +#include <asm/mshyperv.h> +#include <asm/paravirt.h> +#include <asm/apic.h> + +static bool __initdata hv_pvspin = true; + +static void hv_qlock_kick(int cpu) +{ + apic->send_IPI(cpu, X86_PLATFORM_IPI_VECTOR); +} + +static void hv_qlock_wait(u8 *byte, u8 val) +{ + unsigned long msr_val; + unsigned long flags; + + if (in_nmi()) + return; + + /* + * Reading HV_X64_MSR_GUEST_IDLE MSR tells the hypervisor that the + * vCPU can be put into 'idle' state. This 'idle' state is + * terminated by an IPI, usually from hv_qlock_kick(), even if + * interrupts are disabled on the vCPU. + * + * To prevent a race against the unlock path it is required to + * disable interrupts before accessing the HV_X64_MSR_GUEST_IDLE + * MSR. Otherwise, if the IPI from hv_qlock_kick() arrives between + * the lock value check and the rdmsrl() then the vCPU might be put + * into 'idle' state by the hypervisor and kept in that state for + * an unspecified amount of time. + */ + local_irq_save(flags); + /* + * Only issue the rdmsrl() when the lock state has not changed. + */ + if (READ_ONCE(*byte) == val) + rdmsrl(HV_X64_MSR_GUEST_IDLE, msr_val); + local_irq_restore(flags); +} + +/* + * Hyper-V does not support this so far. + */ +bool hv_vcpu_is_preempted(int vcpu) +{ + return false; +} +PV_CALLEE_SAVE_REGS_THUNK(hv_vcpu_is_preempted); + +void __init hv_init_spinlocks(void) +{ + if (!hv_pvspin || !apic || + !(ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) || + !(ms_hyperv.features & HV_X64_MSR_GUEST_IDLE_AVAILABLE)) { + pr_info("PV spinlocks disabled\n"); + return; + } + pr_info("PV spinlocks enabled\n"); + + __pv_init_lock_hash(); + pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath; + pv_ops.lock.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock); + pv_ops.lock.wait = hv_qlock_wait; + pv_ops.lock.kick = hv_qlock_kick; + pv_ops.lock.vcpu_is_preempted = PV_CALLEE_SAVE(hv_vcpu_is_preempted); +} + +static __init int hv_parse_nopvspin(char *arg) +{ + hv_pvspin = false; + return 0; +} +early_param("hv_nopvspin", hv_parse_nopvspin); diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index ef5f29f913d7..e65d7fe6489f 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -231,6 +231,6 @@ void hyperv_setup_mmu_ops(void) return; pr_info("Using hypercall for remote TLB flush\n"); - pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others; - pv_mmu_ops.tlb_remove_table = tlb_remove_table; + pv_ops.mmu.flush_tlb_others = hyperv_flush_tlb_others; + pv_ops.mmu.tlb_remove_table = tlb_remove_table; } diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 4505ac2735ad..9e5ca30738e5 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -8,7 +8,7 @@ DECLARE_PER_CPU(unsigned long, cpu_dr7); -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL /* * These special macros can be used to get or set a debugging register */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 13c5ee878a47..68a99d2a5f33 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -108,7 +108,7 @@ static inline int desc_empty(const void *ptr) return !(desc[0] | desc[1]); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #define load_TR_desc() native_load_tr_desc() @@ -134,7 +134,7 @@ static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) { } -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 6390bd8c141b..50ba74a34a37 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -162,7 +162,7 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags); -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL static inline void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) { diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 00e01d215f74..4139f7650fe5 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -38,6 +38,8 @@ #define HV_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) /* Partition reference TSC MSR is available */ #define HV_MSR_REFERENCE_TSC_AVAILABLE (1 << 9) +/* Partition Guest IDLE MSR is available */ +#define HV_X64_MSR_GUEST_IDLE_AVAILABLE (1 << 10) /* A partition's reference time stamp counter (TSC) page */ #define HV_X64_MSR_REFERENCE_TSC 0x40000021 @@ -246,6 +248,9 @@ #define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 #define HV_X64_MSR_STIMER3_COUNT 0x400000B7 +/* Hyper-V guest idle MSR */ +#define HV_X64_MSR_GUEST_IDLE 0x400000F0 + /* Hyper-V guest crash notification MSR's */ #define HV_X64_MSR_CRASH_P0 0x40000100 #define HV_X64_MSR_CRASH_P1 0x40000101 diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 15450a675031..058e40fed167 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -64,7 +64,7 @@ static inline __cpuidle void native_halt(void) #endif -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #ifndef __ASSEMBLY__ @@ -123,6 +123,10 @@ static inline notrace unsigned long arch_local_irq_save(void) #define DISABLE_INTERRUPTS(x) cli #ifdef CONFIG_X86_64 +#ifdef CONFIG_DEBUG_ENTRY +#define SAVE_FLAGS(x) pushfq; popq %rax +#endif + #define SWAPGS swapgs /* * Currently paravirt can't handle swapgs nicely when we @@ -135,8 +139,6 @@ static inline notrace unsigned long arch_local_irq_save(void) */ #define SWAPGS_UNSAFE_STACK swapgs -#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */ - #define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ @@ -145,18 +147,12 @@ static inline notrace unsigned long arch_local_irq_save(void) swapgs; \ sysretl -#ifdef CONFIG_DEBUG_ENTRY -#define SAVE_FLAGS(x) pushfq; popq %rax -#endif #else #define INTERRUPT_RETURN iret -#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit -#define GET_CR0_INTO_EAX movl %cr0, %eax #endif - #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_PARAVIRT */ +#endif /* CONFIG_PARAVIRT_XXL */ #ifndef __ASSEMBLY__ static inline int arch_irqs_disabled_flags(unsigned long flags) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index eeeb9289c764..0ca50611e8ce 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -16,12 +16,12 @@ extern atomic64_t last_mm_ctx_id; -#ifndef CONFIG_PARAVIRT +#ifndef CONFIG_PARAVIRT_XXL static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) { } -#endif /* !CONFIG_PARAVIRT */ +#endif /* !CONFIG_PARAVIRT_XXL */ #ifdef CONFIG_PERF_EVENTS diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f37704497d8f..0d6271cce198 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -351,6 +351,8 @@ int hyperv_flush_guest_mapping(u64 as); #ifdef CONFIG_X86_64 void hv_apic_init(void); +void __init hv_init_spinlocks(void); +bool hv_vcpu_is_preempted(int vcpu); #else static inline void hv_apic_init(void) {} #endif diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 04addd6e0a4a..91e4cf189914 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -242,7 +242,7 @@ static inline unsigned long long native_read_pmc(int counter) return EAX_EDX_VAL(val, low, high); } -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL #include <asm/paravirt.h> #else #include <linux/errno.h> @@ -305,7 +305,7 @@ do { \ #define rdpmcl(counter, val) ((val) = native_read_pmc(counter)) -#endif /* !CONFIG_PARAVIRT */ +#endif /* !CONFIG_PARAVIRT_XXL */ /* * 64-bit version of wrmsr_safe(): diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index e375d4266b53..4bf42f9e4eea 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -17,16 +17,73 @@ #include <linux/cpumask.h> #include <asm/frame.h> +static inline unsigned long long paravirt_sched_clock(void) +{ + return PVOP_CALL0(unsigned long long, time.sched_clock); +} + +struct static_key; +extern struct static_key paravirt_steal_enabled; +extern struct static_key paravirt_steal_rq_enabled; + +static inline u64 paravirt_steal_clock(int cpu) +{ + return PVOP_CALL1(u64, time.steal_clock, cpu); +} + +/* The paravirtualized I/O functions */ +static inline void slow_down_io(void) +{ + pv_ops.cpu.io_delay(); +#ifdef REALLY_SLOW_IO + pv_ops.cpu.io_delay(); + pv_ops.cpu.io_delay(); + pv_ops.cpu.io_delay(); +#endif +} + +static inline void __flush_tlb(void) +{ + PVOP_VCALL0(mmu.flush_tlb_user); +} + +static inline void __flush_tlb_global(void) +{ + PVOP_VCALL0(mmu.flush_tlb_kernel); +} + +static inline void __flush_tlb_one_user(unsigned long addr) +{ + PVOP_VCALL1(mmu.flush_tlb_one_user, addr); +} + +static inline void flush_tlb_others(const struct cpumask *cpumask, + const struct flush_tlb_info *info) +{ + PVOP_VCALL2(mmu.flush_tlb_others, cpumask, info); +} + +static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + PVOP_VCALL2(mmu.tlb_remove_table, tlb, table); +} + +static inline void paravirt_arch_exit_mmap(struct mm_struct *mm) +{ + PVOP_VCALL1(mmu.exit_mmap, mm); +} + +#ifdef CONFIG_PARAVIRT_XXL static inline void load_sp0(unsigned long sp0) { - PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0); + PVOP_VCALL1(cpu.load_sp0, sp0); } /* The paravirtualized CPUID instruction. */ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { - PVOP_VCALL4(pv_cpu_ops.cpuid, eax, ebx, ecx, edx); + PVOP_VCALL4(cpu.cpuid, eax, ebx, ecx, edx); } /* @@ -34,98 +91,98 @@ static inline void __cpuid(unsigned int *eax, unsigned int *ebx, */ static inline unsigned long paravirt_get_debugreg(int reg) { - return PVOP_CALL1(unsigned long, pv_cpu_ops.get_debugreg, reg); + return PVOP_CALL1(unsigned long, cpu.get_debugreg, reg); } #define get_debugreg(var, reg) var = paravirt_get_debugreg(reg) static inline void set_debugreg(unsigned long val, int reg) { - PVOP_VCALL2(pv_cpu_ops.set_debugreg, reg, val); + PVOP_VCALL2(cpu.set_debugreg, reg, val); } static inline unsigned long read_cr0(void) { - return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr0); + return PVOP_CALL0(unsigned long, cpu.read_cr0); } static inline void write_cr0(unsigned long x) { - PVOP_VCALL1(pv_cpu_ops.write_cr0, x); + PVOP_VCALL1(cpu.write_cr0, x); } static inline unsigned long read_cr2(void) { - return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr2); + return PVOP_CALL0(unsigned long, mmu.read_cr2); } static inline void write_cr2(unsigned long x) { - PVOP_VCALL1(pv_mmu_ops.write_cr2, x); + PVOP_VCALL1(mmu.write_cr2, x); } static inline unsigned long __read_cr3(void) { - return PVOP_CALL0(unsigned long, pv_mmu_ops.read_cr3); + return PVOP_CALL0(unsigned long, mmu.read_cr3); } static inline void write_cr3(unsigned long x) { - PVOP_VCALL1(pv_mmu_ops.write_cr3, x); + PVOP_VCALL1(mmu.write_cr3, x); } static inline void __write_cr4(unsigned long x) { - PVOP_VCALL1(pv_cpu_ops.write_cr4, x); + PVOP_VCALL1(cpu.write_cr4, x); } #ifdef CONFIG_X86_64 static inline unsigned long read_cr8(void) { - return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr8); + return PVOP_CALL0(unsigned long, cpu.read_cr8); } static inline void write_cr8(unsigned long x) { - PVOP_VCALL1(pv_cpu_ops.write_cr8, x); + PVOP_VCALL1(cpu.write_cr8, x); } #endif static inline void arch_safe_halt(void) { - PVOP_VCALL0(pv_irq_ops.safe_halt); + PVOP_VCALL0(irq.safe_halt); } static inline void halt(void) { - PVOP_VCALL0(pv_irq_ops.halt); + PVOP_VCALL0(irq.halt); } static inline void wbinvd(void) { - PVOP_VCALL0(pv_cpu_ops.wbinvd); + PVOP_VCALL0(cpu.wbinvd); } #define get_kernel_rpl() (pv_info.kernel_rpl) static inline u64 paravirt_read_msr(unsigned msr) { - return PVOP_ |