diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-11-23 07:18:09 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-11-23 07:18:09 +0100 |
commit | 064e6a8ba61a751625478f656c6f76a6f37a009e (patch) | |
tree | a22d84d037543f74b6c86969c7e87ab799768de9 /arch/x86 | |
parent | af25ed59b5616b389d90877f7085dc5d457a3d49 (diff) | |
parent | 23400ac997062647f2b63c82030d189671b1effe (diff) |
Merge branch 'linus' into x86/fpu, to resolve conflicts
Conflicts:
arch/x86/kernel/fpu/core.c
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86')
26 files changed, 330 insertions, 194 deletions
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 536ccfcc01c6..34d9e15857c3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -40,8 +40,8 @@ GCOV_PROFILE := n UBSAN_SANITIZE :=n LDFLAGS := -m elf_$(UTS_MACHINE) -ifeq ($(CONFIG_RELOCATABLE),y) -# If kernel is relocatable, build compressed kernel as PIE. +# Compressed kernel should be built as PIE since it may be loaded at any +# address by the bootloader. ifeq ($(CONFIG_X86_32),y) LDFLAGS += $(call ld-option, -pie) $(call ld-option, --no-dynamic-linker) else @@ -51,7 +51,6 @@ else LDFLAGS += $(shell $(LD) --help 2>&1 | grep -q "\-z noreloc-overflow" \ && echo "-z noreloc-overflow -pie --no-dynamic-linker") endif -endif LDFLAGS_vmlinux := -T hostprogs-y := mkpiggy diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 26240dde081e..4224ede43b4e 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c @@ -87,6 +87,12 @@ int validate_cpu(void) return -1; } + if (CONFIG_X86_MINIMUM_CPU_FAMILY <= 4 && !IS_ENABLED(CONFIG_M486) && + !has_eflag(X86_EFLAGS_ID)) { + printf("This kernel requires a CPU with the CPUID instruction. Build with CONFIG_M486=y to run on this CPU.\n"); + return -1; + } + if (err_flags) { puts("This kernel requires the following features " "not present on the CPU:\n"); diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 0ab5ee1c26af..aa8b0672f87a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -888,7 +888,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req) unsigned long auth_tag_len = crypto_aead_authsize(tfm); u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); struct scatter_walk src_sg_walk; - struct scatter_walk dst_sg_walk; + struct scatter_walk dst_sg_walk = {}; unsigned int i; /* Assuming we are supporting rfc4106 64-bit extended */ @@ -968,7 +968,7 @@ static int helper_rfc4106_decrypt(struct aead_request *req) u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); u8 authTag[16]; struct scatter_walk src_sg_walk; - struct scatter_walk dst_sg_walk; + struct scatter_walk dst_sg_walk = {}; unsigned int i; if (unlikely(req->assoclen != 16 && req->assoclen != 20)) diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 5f845eef9a4d..81195cca7eae 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -8,8 +8,12 @@ #define PCI_DEVICE_ID_INTEL_HSW_IMC 0x0c00 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC 0x0a04 #define PCI_DEVICE_ID_INTEL_BDW_IMC 0x1604 -#define PCI_DEVICE_ID_INTEL_SKL_IMC 0x191f -#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_U_IMC 0x1904 +#define PCI_DEVICE_ID_INTEL_SKL_Y_IMC 0x190c +#define PCI_DEVICE_ID_INTEL_SKL_HD_IMC 0x1900 +#define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 +#define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f +#define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -616,13 +620,29 @@ static const struct pci_device_id bdw_uncore_pci_ids[] = { static const struct pci_device_id skl_uncore_pci_ids[] = { { /* IMC */ - PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_IMC), + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_Y_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, { /* IMC */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_U_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_HQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -666,8 +686,12 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core Processor */ IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver), /* 4th Gen Core ULT Mobile Processor */ IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver), /* 5th Gen Core U */ - IMC_DEV(SKL_IMC, &skl_uncore_pci_driver), /* 6th Gen Core */ + IMC_DEV(SKL_Y_IMC, &skl_uncore_pci_driver), /* 6th Gen Core Y */ IMC_DEV(SKL_U_IMC, &skl_uncore_pci_driver), /* 6th Gen Core U */ + IMC_DEV(SKL_HD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Dual Core */ + IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ + IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ + IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ { /* end marker */ } }; diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 5b6753d1f7f4..49da9f497b90 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -17,6 +17,7 @@ extern int intel_mid_pci_init(void); extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state); +extern pci_power_t intel_mid_pci_get_power_state(struct pci_dev *pdev); extern void intel_mid_pwr_power_off(void); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4b20f7304b9c..bdde80731f49 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -948,7 +948,6 @@ struct kvm_x86_ops { int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); bool (*invpcid_supported)(void); - void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -958,8 +957,6 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); - u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc); - void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); int (*check_intercept)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index c7364bd633e1..51287cd90bf6 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1042,8 +1042,11 @@ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life) if (apm_info.get_power_status_broken) return APM_32_UNSUPPORTED; - if (apm_bios_call(&call)) + if (apm_bios_call(&call)) { + if (!call.err) + return APM_NO_ERROR; return call.err; + } *status = call.ebx; *bat = call.ecx; if (apm_info.get_power_status_swabinminutes) { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index b81fe2d63e15..1e81a37c034e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -347,7 +347,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) #ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); - unsigned int socket_id, core_complex_id; bits = c->x86_coreid_bits; /* Low order bits define the core id (index of core in socket) */ @@ -365,10 +364,7 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) if (c->x86 != 0x17 || !cpuid_edx(0x80000006)) return; - socket_id = (c->apicid >> bits) - 1; - core_complex_id = (c->apicid & ((1 << bits) - 1)) >> 3; - - per_cpu(cpu_llc_id, cpu) = (socket_id << 3) | core_complex_id; + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; #endif } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bd910a7dd0a..cc9e980c68ec 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -979,6 +979,35 @@ static void x86_init_cache_qos(struct cpuinfo_x86 *c) } /* + * The physical to logical package id mapping is initialized from the + * acpi/mptables information. Make sure that CPUID actually agrees with + * that. + */ +static void sanitize_package_id(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_SMP + unsigned int pkg, apicid, cpu = smp_processor_id(); + + apicid = apic->cpu_present_to_apicid(cpu); + pkg = apicid >> boot_cpu_data.x86_coreid_bits; + + if (apicid != c->initial_apicid) { + pr_err(FW_BUG "CPU%u: APIC id mismatch. Firmware: %x CPUID: %x\n", + cpu, apicid, c->initial_apicid); + c->initial_apicid = apicid; + } + if (pkg != c->phys_proc_id) { + pr_err(FW_BUG "CPU%u: Using firmware package id %u instead of %u\n", + cpu, pkg, c->phys_proc_id); + c->phys_proc_id = pkg; + } + c->logical_proc_id = topology_phys_to_logical_pkg(pkg); +#else + c->logical_proc_id = 0; +#endif +} + +/* * This does the hard work of actually picking apart the CPU stuff... */ static void identify_cpu(struct cpuinfo_x86 *c) @@ -1103,8 +1132,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) #ifdef CONFIG_NUMA numa_add_cpu(smp_processor_id()); #endif - /* The boot/hotplug time assigment got cleared, restore it */ - c->logical_proc_id = topology_phys_to_logical_pkg(c->phys_proc_id); + sanitize_package_id(c); } /* diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 9b7cf5c28f5f..85f854b98a9d 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -112,7 +112,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, for (; stack < stack_info.end; stack++) { unsigned long real_addr; int reliable = 0; - unsigned long addr = *stack; + unsigned long addr = READ_ONCE_NOCHECK(*stack); unsigned long *ret_addr_p = unwind_get_return_address_ptr(&state); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7d8e2628e82c..e4e97a5355ce 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -461,14 +461,14 @@ void fpu__clear(struct fpu *fpu) { WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ - if (!static_cpu_has(X86_FEATURE_FPU)) { - /* FPU state will be reallocated lazily at the first use. */ - fpu__drop(fpu); - } else { - if (!fpu->fpstate_active) { - fpu__activate_curr(fpu); - user_fpu_begin(); - } + fpu__drop(fpu); + + /* + * Make sure fpstate is cleared and initialized. + */ + if (static_cpu_has(X86_FEATURE_FPU)) { + fpu__activate_curr(fpu); + user_fpu_begin(); copy_init_fpstate_to_fpregs(); } } diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index b6b2f0264af3..2dabea46f039 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -665,14 +665,17 @@ __PAGE_ALIGNED_BSS initial_pg_pmd: .fill 1024*KPMDS,4,0 #else -ENTRY(initial_page_table) +.globl initial_page_table +initial_page_table: .fill 1024,4,0 #endif initial_pg_fixmap: .fill 1024,4,0 -ENTRY(empty_zero_page) +.globl empty_zero_page +empty_zero_page: .fill 4096,1,0 -ENTRY(swapper_pg_dir) +.globl swapper_pg_dir +swapper_pg_dir: .fill 1024,4,0 EXPORT_SYMBOL(empty_zero_page) diff --git a/arch/x86/kernel/sysfb_simplefb.c b/arch/x86/kernel/sysfb_simplefb.c index 764a29f84de7..85195d447a92 100644 --- a/arch/x86/kernel/sysfb_simplefb.c +++ b/arch/x86/kernel/sysfb_simplefb.c @@ -66,13 +66,36 @@ __init int create_simplefb(const struct screen_info *si, { struct platform_device *pd; struct resource res; - unsigned long len; + u64 base, size; + u32 length; - /* don't use lfb_size as it may contain the whole VMEM instead of only - * the part that is occupied by the framebuffer */ - len = mode->height * mode->stride; - len = PAGE_ALIGN(len); - if (len > (u64)si->lfb_size << 16) { + /* + * If the 64BIT_BASE capability is set, ext_lfb_base will contain the + * upper half of the base address. Assemble the address, then make sure + * it is valid and we can actually access it. + */ + base = si->lfb_base; + if (si->capabilities & VIDEO_CAPABILITY_64BIT_BASE) + base |= (u64)si->ext_lfb_base << 32; + if (!base || (u64)(resource_size_t)base != base) { + printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n"); + return -EINVAL; + } + + /* + * Don't use lfb_size as IORESOURCE size, since it may contain the + * entire VMEM, and thus require huge mappings. Use just the part we + * need, that is, the part where the framebuffer is located. But verify + * that it does not exceed the advertised VMEM. + * Note that in case of VBE, the lfb_size is shifted by 16 bits for + * historical reasons. + */ + size = si->lfb_size; + if (si->orig_video_isVGA == VIDEO_TYPE_VLFB) + size <<= 16; + length = mode->height * mode->stride; + length = PAGE_ALIGN(length); + if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); return -EINVAL; } @@ -81,8 +104,8 @@ __init int create_simplefb(const struct screen_info *si, memset(&res, 0, sizeof(res)); res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; res.name = simplefb_resname; - res.start = si->lfb_base; - res.end = si->lfb_base + len - 1; + res.start = base; + res.end = res.start + length - 1; if (res.end <= res.start) return -EINVAL; diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c index 2d721e533cf4..b80e8bf43cc6 100644 --- a/arch/x86/kernel/unwind_guess.c +++ b/arch/x86/kernel/unwind_guess.c @@ -7,11 +7,13 @@ unsigned long unwind_get_return_address(struct unwind_state *state) { + unsigned long addr = READ_ONCE_NOCHECK(*state->sp); + if (unwind_done(state)) return 0; return ftrace_graph_ret_addr(state->task, &state->graph_idx, - *state->sp, state->sp); + addr, state->sp); } EXPORT_SYMBOL_GPL(unwind_get_return_address); @@ -23,8 +25,10 @@ bool unwind_next_frame(struct unwind_state *state) return false; do { + unsigned long addr = READ_ONCE_NOCHECK(*state->sp); + for (state->sp++; state->sp < info->end; state->sp++) - if (__kernel_text_address(*state->sp)) + if (__kernel_text_address(addr)) return true; state->sp = info->next_sp; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4e95d3eb2955..cbd7b92585bb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5045,7 +5045,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); - if (ctxt->rip_relative) + if (ctxt->rip_relative && likely(ctxt->memopp)) ctxt->memopp->addr.mem.ea = address_mask(ctxt, ctxt->memopp->addr.mem.ea + ctxt->_eip); diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 25810b144b58..4da03030d5a7 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -156,6 +156,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, } +static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (!level) + return -1; + + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); +} + int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -163,18 +173,26 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, struct kvm_lapic_irq irq; int r; - if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) - return -EWOULDBLOCK; + switch (e->type) { + case KVM_IRQ_ROUTING_HV_SINT: + return kvm_hv_set_sint(e, kvm, irq_source_id, level, + line_status); - if (kvm_msi_route_invalid(kvm, e)) - return -EINVAL; + case KVM_IRQ_ROUTING_MSI: + if (kvm_msi_route_invalid(kvm, e)) + return -EINVAL; - kvm_set_msi_irq(kvm, e, &irq); + kvm_set_msi_irq(kvm, e, &irq); - if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) - return r; - else - return -EWOULDBLOCK; + if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) + return r; + break; + + default: + break; + } + + return -EWOULDBLOCK; } int kvm_request_irq_source_id(struct kvm *kvm) @@ -254,16 +272,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } -static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, int level, - bool line_status) -{ - if (!level) - return -1; - - return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); -} - int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) @@ -423,18 +431,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, srcu_read_unlock(&kvm->irq_srcu, idx); } -int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, - int irq_source_id, int level, bool line_status) -{ - switch (irq->type) { - case KVM_IRQ_ROUTING_HV_SINT: - return kvm_hv_set_sint(irq, kvm, irq_source_id, level, - line_status); - default: - return -EWOULDBLOCK; - } -} - void kvm_arch_irq_routing_update(struct kvm *kvm) { kvm_hv_irq_routing_update(kvm); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f8157a36ab09..8ca1eca5038d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1138,21 +1138,6 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - svm->vmcb->control.tsc_offset += adjustment; - if (is_guest_mode(vcpu)) - svm->nested.hsave->control.tsc_offset += adjustment; - else - trace_kvm_write_tsc_offset(vcpu->vcpu_id, - svm->vmcb->control.tsc_offset - adjustment, - svm->vmcb->control.tsc_offset); - - mark_dirty(svm->vmcb, VMCB_INTERCEPTS); -} - static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; @@ -3449,12 +3434,6 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } -static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) -{ - struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); - return vmcb->control.tsc_offset + host_tsc; -} - static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5422,8 +5401,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .has_wbinvd_exit = svm_has_wbinvd_exit, .write_tsc_offset = svm_write_tsc_offset, - .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest, - .read_l1_tsc = svm_read_l1_tsc, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 531c44633190..3980da515fd0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -187,6 +187,7 @@ struct vmcs { */ struct loaded_vmcs { struct vmcs *vmcs; + struct vmcs *shadow_vmcs; int cpu; int launched; struct list_head loaded_vmcss_on_cpu_link; @@ -411,7 +412,6 @@ struct nested_vmx { * memory during VMXOFF, VMCLEAR, VMPTRLD. */ struct vmcs12 *cached_vmcs12; - struct vmcs *current_shadow_vmcs; /* * Indicates if the shadow vmcs must be updated with the * data hold by vmcs12 @@ -421,7 +421,6 @@ struct nested_vmx { /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; int vmcs02_num; - u64 vmcs01_tsc_offset; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; @@ -1419,6 +1418,8 @@ static void vmcs_clear(struct vmcs *vmcs) static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) { vmcs_clear(loaded_vmcs->vmcs); + if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) + vmcs_clear(loaded_vmcs->shadow_vmcs); loaded_vmcs->cpu = -1; loaded_vmcs->launched = 0; } @@ -2599,20 +2600,6 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu) } /* - * Like guest_read_tsc, but always returns L1's notion of the timestamp - * counter, even if a nested guest (L2) is currently running. - */ -static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) -{ - u64 tsc_offset; - - tsc_offset = is_guest_mode(vcpu) ? - to_vmx(vcpu)->nested.vmcs01_tsc_offset : - vmcs_read64(TSC_OFFSET); - return host_tsc + tsc_offset; -} - -/* * writes 'offset' into guest's timestamp counter offset register */ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) @@ -2625,7 +2612,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) * to the newly set TSC to get L2's TSC. */ struct vmcs12 *vmcs12; - to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset; /* recalculate vmcs02.TSC_OFFSET: */ vmcs12 = get_vmcs12(vcpu); vmcs_write64(TSC_OFFSET, offset + @@ -2638,19 +2624,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) } } -static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) -{ - u64 offset = vmcs_read64(TSC_OFFSET); - - vmcs_write64(TSC_OFFSET, offset + adjustment); - if (is_guest_mode(vcpu)) { - /* Even when running L2, the adjustment needs to apply to L1 */ - to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment; - } else - trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset, - offset + adjustment); -} - static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); @@ -3556,6 +3529,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } static void free_kvm_area(void) @@ -6692,6 +6666,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); + item->vmcs02.shadow_vmcs = NULL; if (!item->vmcs02.vmcs) { kfree(item); return NULL; @@ -7068,7 +7043,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ vmcs_clear(shadow_vmcs); - vmx->nested.current_shadow_vmcs = shadow_vmcs; + vmx->vmcs01.shadow_vmcs = shadow_vmcs; } INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); @@ -7170,8 +7145,11 @@ static void free_nested(struct vcpu_vmx *vmx) free_page((unsigned long)vmx->nested.msr_bitmap); vmx->nested.msr_bitmap = NULL; } - if (enable_shadow_vmcs) - free_vmcs(vmx->nested.current_shadow_vmcs); + if (enable_shadow_vmcs) { + vmcs_clear(vmx->vmcs01.shadow_vmcs); + free_vmcs(vmx->vmcs01.shadow_vmcs); + vmx->vmcs01.shadow_vmcs = NULL; + } kfree(vmx->nested.cached_vmcs12); /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { @@ -7348,7 +7326,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) int i; unsigned long field; u64 field_value; - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; const unsigned long *fields = shadow_read_write_fields; const int num_fields = max_shadow_read_write_fields; @@ -7397,7 +7375,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) int i, q; unsigned long field; u64 field_value = 0; - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; vmcs_load(shadow_vmcs); @@ -7587,7 +7565,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, - __pa(vmx->nested.current_shadow_vmcs)); + __pa(vmx->vmcs01.shadow_vmcs)); vmx->nested.sync_shadow_vmcs = true; } } @@ -7655,7 +7633,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; - if (!(types & (1UL << type))) { + if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); skip_emulated_instruction(vcpu); @@ -7718,7 +7696,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; - if (!(types & (1UL << type))) { + if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); skip_emulated_instruction(vcpu); @@ -9152,6 +9130,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->loaded_vmcs = &vmx->vmcs01; vmx->loaded_vmcs->vmcs = alloc_vmcs(); + vmx->loaded_vmcs->shadow_vmcs = NULL; if (!vmx->loaded_vmcs->vmcs) goto free_msrs; if (!vmm_exclusive) @@ -10057,9 +10036,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) vmcs_write64(TSC_OFFSET, - vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); + vcpu->arch.tsc_offset + vmcs12->tsc_offset); else - vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); @@ -10289,8 +10268,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) enter_guest_mode(vcpu); - vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); - if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); @@ -10814,7 +10791,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, load_vmcs12_host_state(vcpu, vmcs12); /* Update any VMCS fields that might have changed while L2 ran */ - vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); if (vmx->hv_deadline_tsc == -1) vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); @@ -11335,8 +11312,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, .write_tsc_offset = vmx_write_tsc_offset, - .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest, - .read_l1_tsc = vmx_read_l1_tsc, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cfe6a75b5358..56900f3e7bcf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -210,7 +210,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn) struct kvm_shared_msrs *locals = container_of(urn, struct kvm_shared_msrs, urn); struct kvm_shared_msr_values *values; + unsigned long flags; + /* + * Disabling irqs at this point since the following code could be + * interrupted and executed through kvm_arch_hardware_disable() + */ + local_irq_save(flags); + if (locals->registered) { + locals->registered = false; + user_return_notifier_unregister(urn); + } + local_irq_restore(flags); for (slot = 0; slot < shared_msrs_global.nr; ++slot) { values = &locals->values[slot]; if (values->host != values->curr) { @@ -218,8 +229,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn) values->curr = values->host; } } - locals->registered = false; - user_r |