summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-23 11:17:56 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-23 11:17:56 -0700
commitf9a705ad1c077ec2872c641f0db9c0d5b4a097bb (patch)
tree7f5d18d74f700be5bcf72ec5f4955f016eac9ab9 /arch/x86
parent9313f8026328d0309d093f6774be4b8f5340c0e5 (diff)
parent29cf0f5007a215b51feb0ae25ca5353480d53ead (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "For x86, there is a new alternative and (in the future) more scalable implementation of extended page tables that does not need a reverse map from guest physical addresses to host physical addresses. For now it is disabled by default because it is still lacking a few of the existing MMU's bells and whistles. However it is a very solid piece of work and it is already available for people to hammer on it. Other updates: ARM: - New page table code for both hypervisor and guest stage-2 - Introduction of a new EL2-private host context - Allow EL2 to have its own private per-CPU variables - Support of PMU event filtering - Complete rework of the Spectre mitigation PPC: - Fix for running nested guests with in-kernel IRQ chip - Fix race condition causing occasional host hard lockup - Minor cleanups and bugfixes x86: - allow trapping unknown MSRs to userspace - allow userspace to force #GP on specific MSRs - INVPCID support on AMD - nested AMD cleanup, on demand allocation of nested SVM state - hide PV MSRs and hypercalls for features not enabled in CPUID - new test for MSR_IA32_TSC writes from host and guest - cleanups: MMU, CPUID, shared MSRs - LAPIC latency optimizations ad bugfixes" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (232 commits) kvm: x86/mmu: NX largepage recovery for TDP MMU kvm: x86/mmu: Don't clear write flooding count for direct roots kvm: x86/mmu: Support MMIO in the TDP MMU kvm: x86/mmu: Support write protection for nesting in tdp MMU kvm: x86/mmu: Support disabling dirty logging for the tdp MMU kvm: x86/mmu: Support dirty logging for the TDP MMU kvm: x86/mmu: Support changed pte notifier in tdp MMU kvm: x86/mmu: Add access tracking for tdp_mmu kvm: x86/mmu: Support invalidate range MMU notifier for TDP MMU kvm: x86/mmu: Allocate struct kvm_mmu_pages for all pages in TDP MMU kvm: x86/mmu: Add TDP MMU PF handler kvm: x86/mmu: Remove disallowed_hugepage_adjust shadow_walk_iterator arg kvm: x86/mmu: Support zapping SPTEs in the TDP MMU KVM: Cache as_id in kvm_memory_slot kvm: x86/mmu: Add functions to handle changed TDP SPTEs kvm: x86/mmu: Allocate and free TDP MMU roots kvm: x86/mmu: Init / Uninit the TDP MMU kvm: x86/mmu: Introduce tdp_iter KVM: mmu: extract spte.h and spte.c KVM: mmu: Separate updating a PTE from kvm_set_pte_rmapp ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_host.h70
-rw-r--r--arch/x86/include/asm/svm.h90
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h20
-rw-r--r--arch/x86/include/uapi/asm/svm.h2
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile6
-rw-r--r--arch/x86/kvm/cpuid.c137
-rw-r--r--arch/x86/kvm/cpuid.h15
-rw-r--r--arch/x86/kvm/emulate.c22
-rw-r--r--arch/x86/kvm/hyperv.c11
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h2
-rw-r--r--arch/x86/kvm/lapic.c43
-rw-r--r--arch/x86/kvm/lapic.h1
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/mmu/mmu.c974
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h88
-rw-r--r--arch/x86/kvm/mmu/mmutrace.h21
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h50
-rw-r--r--arch/x86/kvm/mmu/spte.c318
-rw-r--r--arch/x86/kvm/mmu/spte.h252
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.c182
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h60
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c1157
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h48
-rw-r--r--arch/x86/kvm/svm/avic.c7
-rw-r--r--arch/x86/kvm/svm/nested.c351
-rw-r--r--arch/x86/kvm/svm/sev.c26
-rw-r--r--arch/x86/kvm/svm/svm.c404
-rw-r--r--arch/x86/kvm/svm/svm.h103
-rw-r--r--arch/x86/kvm/trace.h128
-rw-r--r--arch/x86/kvm/vmx/capabilities.h10
-rw-r--r--arch/x86/kvm/vmx/nested.c178
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c332
-rw-r--r--arch/x86/kvm/vmx/posted_intr.h99
-rw-r--r--arch/x86/kvm/vmx/vmcs.h7
-rw-r--r--arch/x86/kvm/vmx/vmenter.S34
-rw-r--r--arch/x86/kvm/vmx/vmx.c1259
-rw-r--r--arch/x86/kvm/vmx/vmx.h143
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h (renamed from arch/x86/kvm/vmx/ops.h)0
-rw-r--r--arch/x86/kvm/x86.c694
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--arch/x86/mm/fault.c13
44 files changed, 5013 insertions, 2359 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5303dbc5c9bc..d44858b69353 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -80,13 +80,14 @@
#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
-#define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24)
+#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24)
#define KVM_REQ_APICV_UPDATE \
KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
#define KVM_REQ_HV_TLB_FLUSH \
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
+#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -132,7 +133,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
-#define KVM_MAX_CPUID_ENTRIES 80
+#define KVM_MAX_CPUID_ENTRIES 256
#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
@@ -636,7 +637,7 @@ struct kvm_vcpu_arch {
int halt_request; /* real mode on Intel only */
int cpuid_nent;
- struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+ struct kvm_cpuid_entry2 *cpuid_entries;
int maxphyaddr;
int max_tdp_level;
@@ -788,6 +789,21 @@ struct kvm_vcpu_arch {
/* AMD MSRC001_0015 Hardware Configuration */
u64 msr_hwcr;
+
+ /* pv related cpuid info */
+ struct {
+ /*
+ * value of the eax register in the KVM_CPUID_FEATURES CPUID
+ * leaf.
+ */
+ u32 features;
+
+ /*
+ * indicates whether pv emulation should be disabled if features
+ * are not present in the guest's cpuid
+ */
+ bool enforce;
+ } pv_cpuid;
};
struct kvm_lpage_info {
@@ -860,6 +876,13 @@ struct kvm_hv {
struct kvm_hv_syndbg hv_syndbg;
};
+struct msr_bitmap_range {
+ u32 flags;
+ u32 nmsrs;
+ u32 base;
+ unsigned long *bitmap;
+};
+
enum kvm_irqchip_mode {
KVM_IRQCHIP_NONE,
KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
@@ -961,8 +984,31 @@ struct kvm_arch {
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
+ /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
+ u32 user_space_msr_mask;
+
+ struct {
+ u8 count;
+ bool default_allow:1;
+ struct msr_bitmap_range ranges[16];
+ } msr_filter;
+
struct kvm_pmu_event_filter *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread;
+
+ /*
+ * Whether the TDP MMU is enabled for this VM. This contains a
+ * snapshot of the TDP MMU module parameter from when the VM was
+ * created and remains unchanged for the life of the VM. If this is
+ * true, TDP MMU handler functions will run for various MMU
+ * operations.
+ */
+ bool tdp_mmu_enabled;
+
+ /* List of struct tdp_mmu_pages being used as roots */
+ struct list_head tdp_mmu_roots;
+ /* List of struct tdp_mmu_pages not being used as roots */
+ struct list_head tdp_mmu_pages;
};
struct kvm_vm_stat {
@@ -1069,7 +1115,7 @@ struct kvm_x86_ops {
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
- void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
+ int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
@@ -1143,7 +1189,12 @@ struct kvm_x86_ops {
/* Returns actual tsc_offset set in active VMCS */
u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
- void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
+ /*
+ * Retrieve somewhat arbitrary exit information. Intended to be used
+ * only from within tracepoints to avoid VMREADs when tracing is off.
+ */
+ void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2,
+ u32 *exit_int_info, u32 *exit_int_info_err_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
@@ -1221,12 +1272,13 @@ struct kvm_x86_ops {
int (*get_msr_feature)(struct kvm_msr_entry *entry);
- bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
+ bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
void (*migrate_timers)(struct kvm_vcpu *vcpu);
+ void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
};
struct kvm_x86_nested_ops {
@@ -1238,7 +1290,7 @@ struct kvm_x86_nested_ops {
int (*set_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
struct kvm_nested_state *kvm_state);
- bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu);
+ bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
int (*enable_evmcs)(struct kvm_vcpu *vcpu,
@@ -1612,8 +1664,8 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
-void kvm_define_shared_msr(unsigned index, u32 msr);
-int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
+void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index cf13f9e78585..71d630bb5e08 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -3,10 +3,54 @@
#define __SVM_H
#include <uapi/asm/svm.h>
-
+#include <uapi/asm/kvm.h>
+
+/*
+ * 32-bit intercept words in the VMCB Control Area, starting
+ * at Byte offset 000h.
+ */
+
+enum intercept_words {
+ INTERCEPT_CR = 0,
+ INTERCEPT_DR,
+ INTERCEPT_EXCEPTION,
+ INTERCEPT_WORD3,
+ INTERCEPT_WORD4,
+ INTERCEPT_WORD5,
+ MAX_INTERCEPT,
+};
enum {
- INTERCEPT_INTR,
+ /* Byte offset 000h (word 0) */
+ INTERCEPT_CR0_READ = 0,
+ INTERCEPT_CR3_READ = 3,
+ INTERCEPT_CR4_READ = 4,
+ INTERCEPT_CR8_READ = 8,
+ INTERCEPT_CR0_WRITE = 16,
+ INTERCEPT_CR3_WRITE = 16 + 3,
+ INTERCEPT_CR4_WRITE = 16 + 4,
+ INTERCEPT_CR8_WRITE = 16 + 8,
+ /* Byte offset 004h (word 1) */
+ INTERCEPT_DR0_READ = 32,
+ INTERCEPT_DR1_READ,
+ INTERCEPT_DR2_READ,
+ INTERCEPT_DR3_READ,
+ INTERCEPT_DR4_READ,
+ INTERCEPT_DR5_READ,
+ INTERCEPT_DR6_READ,
+ INTERCEPT_DR7_READ,
+ INTERCEPT_DR0_WRITE = 48,
+ INTERCEPT_DR1_WRITE,
+ INTERCEPT_DR2_WRITE,
+ INTERCEPT_DR3_WRITE,
+ INTERCEPT_DR4_WRITE,
+ INTERCEPT_DR5_WRITE,
+ INTERCEPT_DR6_WRITE,
+ INTERCEPT_DR7_WRITE,
+ /* Byte offset 008h (word 2) */
+ INTERCEPT_EXCEPTION_OFFSET = 64,
+ /* Byte offset 00Ch (word 3) */
+ INTERCEPT_INTR = 96,
INTERCEPT_NMI,
INTERCEPT_SMI,
INTERCEPT_INIT,
@@ -38,7 +82,8 @@ enum {
INTERCEPT_TASK_SWITCH,
INTERCEPT_FERR_FREEZE,
INTERCEPT_SHUTDOWN,
- INTERCEPT_VMRUN,
+ /* Byte offset 010h (word 4) */
+ INTERCEPT_VMRUN = 128,
INTERCEPT_VMMCALL,
INTERCEPT_VMLOAD,
INTERCEPT_VMSAVE,
@@ -53,15 +98,18 @@ enum {
INTERCEPT_MWAIT_COND,
INTERCEPT_XSETBV,
INTERCEPT_RDPRU,
+ /* Byte offset 014h (word 5) */
+ INTERCEPT_INVLPGB = 160,
+ INTERCEPT_INVLPGB_ILLEGAL,
+ INTERCEPT_INVPCID,
+ INTERCEPT_MCOMMIT,
+ INTERCEPT_TLBSYNC,
};
struct __attribute__ ((__packed__)) vmcb_control_area {
- u32 intercept_cr;
- u32 intercept_dr;
- u32 intercept_exceptions;
- u64 intercept;
- u8 reserved_1[40];
+ u32 intercepts[MAX_INTERCEPT];
+ u32 reserved_1[15 - MAX_INTERCEPT];
u16 pause_filter_thresh;
u16 pause_filter_count;
u64 iopm_base_pa;
@@ -287,32 +335,6 @@ struct vmcb {
#define SVM_SELECTOR_READ_MASK SVM_SELECTOR_WRITE_MASK
#define SVM_SELECTOR_CODE_MASK (1 << 3)
-#define INTERCEPT_CR0_READ 0
-#define INTERCEPT_CR3_READ 3
-#define INTERCEPT_CR4_READ 4
-#define INTERCEPT_CR8_READ 8
-#define INTERCEPT_CR0_WRITE (16 + 0)
-#define INTERCEPT_CR3_WRITE (16 + 3)
-#define INTERCEPT_CR4_WRITE (16 + 4)
-#define INTERCEPT_CR8_WRITE (16 + 8)
-
-#define INTERCEPT_DR0_READ 0
-#define INTERCEPT_DR1_READ 1
-#define INTERCEPT_DR2_READ 2
-#define INTERCEPT_DR3_READ 3
-#define INTERCEPT_DR4_READ 4
-#define INTERCEPT_DR5_READ 5
-#define INTERCEPT_DR6_READ 6
-#define INTERCEPT_DR7_READ 7
-#define INTERCEPT_DR0_WRITE (16 + 0)
-#define INTERCEPT_DR1_WRITE (16 + 1)
-#define INTERCEPT_DR2_WRITE (16 + 2)
-#define INTERCEPT_DR3_WRITE (16 + 3)
-#define INTERCEPT_DR4_WRITE (16 + 4)
-#define INTERCEPT_DR5_WRITE (16 + 5)
-#define INTERCEPT_DR6_WRITE (16 + 6)
-#define INTERCEPT_DR7_WRITE (16 + 7)
-
#define SVM_EVTINJ_VEC_MASK 0xff
#define SVM_EVTINJ_TYPE_SHIFT 8
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cd7de4b401fe..f8ba5289ecb0 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -52,7 +52,7 @@
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES VMCS_CONTROL_BIT(VIRT_APIC_ACCESSES)
#define SECONDARY_EXEC_ENABLE_EPT VMCS_CONTROL_BIT(EPT)
#define SECONDARY_EXEC_DESC VMCS_CONTROL_BIT(DESC_EXITING)
-#define SECONDARY_EXEC_RDTSCP VMCS_CONTROL_BIT(RDTSCP)
+#define SECONDARY_EXEC_ENABLE_RDTSCP VMCS_CONTROL_BIT(RDTSCP)
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE VMCS_CONTROL_BIT(VIRTUAL_X2APIC)
#define SECONDARY_EXEC_ENABLE_VPID VMCS_CONTROL_BIT(VPID)
#define SECONDARY_EXEC_WBINVD_EXITING VMCS_CONTROL_BIT(WBINVD_EXITING)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 0780f97c1850..89e5f3d1bba8 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -192,6 +192,26 @@ struct kvm_msr_list {
__u32 indices[0];
};
+/* Maximum size of any access bitmap in bytes */
+#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600
+
+/* for KVM_X86_SET_MSR_FILTER */
+struct kvm_msr_filter_range {
+#define KVM_MSR_FILTER_READ (1 << 0)
+#define KVM_MSR_FILTER_WRITE (1 << 1)
+ __u32 flags;
+ __u32 nmsrs; /* number of msrs in bitmap */
+ __u32 base; /* MSR index the bitmap starts at */
+ __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
+};
+
+#define KVM_MSR_FILTER_MAX_RANGES 16
+struct kvm_msr_filter {
+#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
+ __u32 flags;
+ struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+};
struct kvm_cpuid_entry {
__u32 function;
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index a7a3403645e5..f1d8307454e0 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -77,6 +77,7 @@
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_XSETBV 0x08d
#define SVM_EXIT_RDPRU 0x08e
+#define SVM_EXIT_INVPCID 0x0a2
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
@@ -182,6 +183,7 @@
{ SVM_EXIT_MONITOR, "monitor" }, \
{ SVM_EXIT_MWAIT, "mwait" }, \
{ SVM_EXIT_XSETBV, "xsetbv" }, \
+ { SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1c0f2560a41c..7f57ede3cb8e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -975,7 +975,7 @@ void arch_haltpoll_disable(unsigned int cpu)
if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
return;
- /* Enable guest halt poll disables host halt poll */
+ /* Disable guest halt poll enables host halt poll */
smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
}
EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index fbd5bd7a945a..f92dfd8ef10d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -66,6 +66,7 @@ config KVM_WERROR
default y if X86_64 && !KASAN
# We use the dependency on !COMPILE_TEST to not be enabled
# blindly in allmodconfig or allyesconfig configurations
+ depends on KVM
depends on (X86_64 && !KASAN) || !COMPILE_TEST
depends on EXPERT
help
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 4a3081e9f4b5..b804444e16d4 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -15,9 +15,11 @@ kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
- hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o
+ hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
+ mmu/spte.o mmu/tdp_iter.o mmu/tdp_mmu.o
-kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o vmx/evmcs.o vmx/nested.o
+kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
+ vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7456f9ad424b..06a278b3701d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -54,7 +54,24 @@ static u32 xstate_required_size(u64 xstate_bv, bool compacted)
#define F feature_bit
-static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
+static inline struct kvm_cpuid_entry2 *cpuid_entry2_find(
+ struct kvm_cpuid_entry2 *entries, int nent, u32 function, u32 index)
+{
+ struct kvm_cpuid_entry2 *e;
+ int i;
+
+ for (i = 0; i < nent; i++) {
+ e = &entries[i];
+
+ if (e->function == function && (e->index == index ||
+ !(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX)))
+ return e;
+ }
+
+ return NULL;
+}
+
+static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
{
struct kvm_cpuid_entry2 *best;
@@ -62,7 +79,7 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu)
* The existing code assumes virtual address is 48-bit or 57-bit in the
* canonical address checks; exit if it is ever changed.
*/
- best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ best = cpuid_entry2_find(entries, nent, 0x80000008, 0);
if (best) {
int vaddr_bits = (best->eax & 0xff00) >> 8;
@@ -107,6 +124,13 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+ /*
+ * save the feature bitmap to avoid cpuid lookup for every PV
+ * operation
+ */
+ if (best)
+ vcpu->arch.pv_cpuid.features = best->eax;
+
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (best)
@@ -121,8 +145,6 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
- kvm_x86_ops.vcpu_after_set_cpuid(vcpu);
-
best = kvm_find_cpuid_entry(vcpu, 1, 0);
if (best && apic) {
if (cpuid_entry_has(best, X86_FEATURE_TSC_DEADLINE_TIMER))
@@ -146,7 +168,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
kvm_pmu_refresh(vcpu);
vcpu->arch.cr4_guest_rsvd_bits =
__cr4_reserved_bits(guest_cpuid_has, vcpu);
- kvm_x86_ops.update_exception_bitmap(vcpu);
+
+ /* Invoke the vendor callback only after the above state is updated. */
+ kvm_x86_ops.vcpu_after_set_cpuid(vcpu);
}
static int is_efer_nx(void)
@@ -186,7 +210,6 @@ int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
not_found:
return 36;
}
-EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
@@ -194,46 +217,53 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry __user *entries)
{
int r, i;
- struct kvm_cpuid_entry *cpuid_entries = NULL;
+ struct kvm_cpuid_entry *e = NULL;
+ struct kvm_cpuid_entry2 *e2 = NULL;
- r = -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
- goto out;
+ return -E2BIG;
+
if (cpuid->nent) {
- cpuid_entries = vmemdup_user(entries,
- array_size(sizeof(struct kvm_cpuid_entry),
- cpuid->nent));
- if (IS_ERR(cpuid_entries)) {
- r = PTR_ERR(cpuid_entries);
- goto out;
+ e = vmemdup_user(entries, array_size(sizeof(*e), cpuid->nent));
+ if (IS_ERR(e))
+ return PTR_ERR(e);
+
+ e2 = kvmalloc_array(cpuid->nent, sizeof(*e2), GFP_KERNEL_ACCOUNT);
+ if (!e2) {
+ r = -ENOMEM;
+ goto out_free_cpuid;
}
}
for (i = 0; i < cpuid->nent; i++) {
- vcpu->arch.cpuid_entries[i].function = cpuid_entries[i].function;
- vcpu->arch.cpuid_entries[i].eax = cpuid_entries[i].eax;
- vcpu->arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx;
- vcpu->arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx;
- vcpu->arch.cpuid_entries[i].edx = cpuid_entries[i].edx;
- vcpu->arch.cpuid_entries[i].index = 0;
- vcpu->arch.cpuid_entries[i].flags = 0;
- vcpu->arch.cpuid_entries[i].padding[0] = 0;
- vcpu->arch.cpuid_entries[i].padding[1] = 0;
- vcpu->arch.cpuid_entries[i].padding[2] = 0;
+ e2[i].function = e[i].function;
+ e2[i].eax = e[i].eax;
+ e2[i].ebx = e[i].ebx;
+ e2[i].ecx = e[i].ecx;
+ e2[i].edx = e[i].edx;
+ e2[i].index = 0;
+ e2[i].flags = 0;
+ e2[i].padding[0] = 0;
+ e2[i].padding[1] = 0;
+ e2[i].padding[2] = 0;
}
- vcpu->arch.cpuid_nent = cpuid->nent;
- r = kvm_check_cpuid(vcpu);
+
+ r = kvm_check_cpuid(e2, cpuid->nent);
if (r) {
- vcpu->arch.cpuid_nent = 0;
- kvfree(cpuid_entries);
- goto out;
+ kvfree(e2);
+ goto out_free_cpuid;
}
+ kvfree(vcpu->arch.cpuid_entries);
+ vcpu->arch.cpuid_entries = e2;
+ vcpu->arch.cpuid_nent = cpuid->nent;
+
cpuid_fix_nx_cap(vcpu);
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
- kvfree(cpuid_entries);
-out:
+out_free_cpuid:
+ kvfree(e);
+
return r;
}
@@ -241,26 +271,32 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
+ struct kvm_cpuid_entry2 *e2 = NULL;
int r;
- r = -E2BIG;
if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
- goto out;
- r = -EFAULT;
- if (copy_from_user(&vcpu->arch.cpuid_entries, entries,
- cpuid->nent * sizeof(struct kvm_cpuid_entry2)))
- goto out;
- vcpu->arch.cpuid_nent = cpuid->nent;
- r = kvm_check_cpuid(vcpu);
+ return -E2BIG;
+
+ if (cpuid->nent) {
+ e2 = vmemdup_user(entries, array_size(sizeof(*e2), cpuid->nent));
+ if (IS_ERR(e2))
+ return PTR_ERR(e2);
+ }
+
+ r = kvm_check_cpuid(e2, cpuid->nent);
if (r) {
- vcpu->arch.cpuid_nent = 0;
- goto out;
+ kvfree(e2);
+ return r;
}
+ kvfree(vcpu->arch.cpuid_entries);
+ vcpu->arch.cpuid_entries = e2;
+ vcpu->arch.cpuid_nent = cpuid->nent;
+
kvm_update_cpuid_runtime(vcpu);
kvm_vcpu_after_set_cpuid(vcpu);
-out:
- return r;
+
+ return 0;
}
int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
@@ -941,17 +977,8 @@ out_free:
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index)
{
- struct kvm_cpuid_entry2 *e;
- int i;
-
- for (i = 0; i < vcpu->arch.cpuid_nent; ++i) {
- e = &vcpu->arch.cpuid_entries[i];
-
- if (e->function == function && (e->index == index ||
- !(e->flags & KVM_CPUID_FLAG_SIGNIFCANT_INDEX)))
- return e;
- }
- return NULL;
+ return cpuid_entry2_find(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent,
+ function, index);
}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 3a923ae15f2f..bf8577947ed2 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -5,6 +5,7 @@
#include "x86.h"
#include <asm/cpu.h>
#include <asm/processor.h>
+#include <uapi/asm/kvm_para.h>
extern u32 kvm_cpu_caps[NCAPINTS] __read_mostly;
void kvm_set_cpu_caps(void);
@@ -34,6 +35,11 @@ static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
return vcpu->arch.maxphyaddr;
}
+static inline bool kvm_vcpu_is_illegal_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+{
+ return (gpa >= BIT_ULL(cpuid_maxphyaddr(vcpu)));
+}
+
struct cpuid_reg {
u32 function;
u32 index;
@@ -308,4 +314,13 @@ static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
}
+static __always_inline bool guest_pv_has(struct kvm_vcpu *vcpu,
+ unsigned int kvm_feature)
+{
+ if (!vcpu->arch.pv_cpuid.enforce)
+ return true;
+
+ return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
+}
+
#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2f6510de6b0c..0d917eb70319 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -3606,7 +3606,7 @@ static int em_rdpid(struct x86_emulate_ctxt *ctxt)
u64 tsc_aux = 0;
if (ctxt->ops->get_msr(ctxt, MSR_TSC_AUX, &tsc_aux))
- return emulate_gp(ctxt, 0);
+ return emulate_ud(ctxt);
ctxt->dst.val = tsc_aux;
return X86EMUL_CONTINUE;
}
@@ -3701,21 +3701,35 @@ static int em_dr_write(struct x86_emulate_ctxt *ctxt)
static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
{
+ u64 msr_index = reg_read(ctxt, VCPU_REGS_RCX);
u64 msr_data;
+ int r;
msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
- if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
+ r = ctxt->ops->set_msr(ctxt, msr_index, msr_data);
+
+ if (r == X86EMUL_IO_NEEDED)
+ return r;
+
+ if (