summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 09:36:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-24 09:36:49 -0700
commit4e241557fc1cb560bd9e77ca1b4a9352732a5427 (patch)
treeda4dbe5e5b3a8792daf9ed7e6bd320c56c86d252 /arch/x86
parent08d183e3c1f650b4db1d07d764502116861542fa (diff)
parentf2ae45edbca7ba5324eef01719ede0151dc5cead (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull first batch of KVM updates from Paolo Bonzini: "The bulk of the changes here is for x86. And for once it's not for silicon that no one owns: these are really new features for everyone. Details: - ARM: several features are in progress but missed the 4.2 deadline. So here is just a smattering of bug fixes, plus enabling the VFIO integration. - s390: Some fixes/refactorings/optimizations, plus support for 2GB pages. - x86: * host and guest support for marking kvmclock as a stable scheduler clock. * support for write combining. * support for system management mode, needed for secure boot in guests. * a bunch of cleanups required for the above * support for virtualized performance counters on AMD * legacy PCI device assignment is deprecated and defaults to "n" in Kconfig; VFIO replaces it On top of this there are also bug fixes and eager FPU context loading for FPU-heavy guests. - Common code: Support for multiple address spaces; for now it is used only for x86 SMM but the s390 folks also have plans" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits) KVM: s390: clear floating interrupt bitmap and parameters KVM: x86/vPMU: Enable PMU handling for AMD PERFCTRn and EVNTSELn MSRs KVM: x86/vPMU: Implement AMD vPMU code for KVM KVM: x86/vPMU: Define kvm_pmu_ops to support vPMU function dispatch KVM: x86/vPMU: introduce kvm_pmu_msr_idx_to_pmc KVM: x86/vPMU: reorder PMU functions KVM: x86/vPMU: whitespace and stylistic adjustments in PMU code KVM: x86/vPMU: use the new macros to go between PMC, PMU and VCPU KVM: x86/vPMU: introduce pmu.h header KVM: x86/vPMU: rename a few PMU functions KVM: MTRR: do not map huge page for non-consistent range KVM: MTRR: simplify kvm_mtrr_get_guest_memory_type KVM: MTRR: introduce mtrr_for_each_mem_type KVM: MTRR: introduce fixed_mtrr_addr_* functions KVM: MTRR: sort variable MTRRs KVM: MTRR: introduce var_mtrr_range KVM: MTRR: introduce fixed_mtrr_segment table KVM: MTRR: improve kvm_mtrr_get_guest_memory_type KVM: MTRR: do not split 64 bits MSR content KVM: MTRR: clean up mtrr default type ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/kvm_emulate.h9
-rw-r--r--arch/x86/include/asm/kvm_host.h92
-rw-r--r--arch/x86/include/asm/pvclock-abi.h1
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm.h14
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/kvmclock.c14
-rw-r--r--arch/x86/kvm/Kconfig9
-rw-r--r--arch/x86/kvm/Makefile6
-rw-r--r--arch/x86/kvm/cpuid.c13
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c303
-rw-r--r--arch/x86/kvm/ioapic.c9
-rw-r--r--arch/x86/kvm/irq_comm.c14
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h5
-rw-r--r--arch/x86/kvm/lapic.c59
-rw-r--r--arch/x86/kvm/lapic.h15
-rw-r--r--arch/x86/kvm/mmu.c678
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/mmu_audit.c20
-rw-r--r--arch/x86/kvm/mtrr.c699
-rw-r--r--arch/x86/kvm/paging_tmpl.h18
-rw-r--r--arch/x86/kvm/pmu.c553
-rw-r--r--arch/x86/kvm/pmu.h118
-rw-r--r--arch/x86/kvm/pmu_amd.c207
-rw-r--r--arch/x86/kvm/pmu_intel.c358
-rw-r--r--arch/x86/kvm/svm.c116
-rw-r--r--arch/x86/kvm/trace.h22
-rw-r--r--arch/x86/kvm/vmx.c363
-rw-r--r--arch/x86/kvm/x86.c890
-rw-r--r--arch/x86/kvm/x86.h8
31 files changed, 3305 insertions, 1323 deletions
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 57a9d94fe160..e16466ec473c 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -193,6 +193,8 @@ struct x86_emulate_ops {
int (*cpl)(struct x86_emulate_ctxt *ctxt);
int (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
+ u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
+ void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
@@ -262,6 +264,11 @@ enum x86emul_mode {
X86EMUL_MODE_PROT64, /* 64-bit (long) mode. */
};
+/* These match some of the HF_* flags defined in kvm_host.h */
+#define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
+#define X86EMUL_SMM_MASK (1 << 6)
+#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7)
+
struct x86_emulate_ctxt {
const struct x86_emulate_ops *ops;
@@ -273,8 +280,8 @@ struct x86_emulate_ctxt {
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
+ int emul_flags;
- bool guest_mode; /* guest running a nested guest */
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f8c0ec3a4a97..c7fa57b529d2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -184,23 +184,12 @@ struct kvm_mmu_memory_cache {
void *objects[KVM_NR_MEM_OBJS];
};
-/*
- * kvm_mmu_page_role, below, is defined as:
- *
- * bits 0:3 - total guest paging levels (2-4, or zero for real mode)
- * bits 4:7 - page table level for this shadow (1-4)
- * bits 8:9 - page table quadrant for 2-level guests
- * bit 16 - direct mapping of virtual to physical mapping at gfn
- * used for real mode and two-dimensional paging
- * bits 17:19 - common access permissions for all ptes in this shadow page
- */
union kvm_mmu_page_role {
unsigned word;
struct {
unsigned level:4;
unsigned cr4_pae:1;
unsigned quadrant:2;
- unsigned pad_for_nice_hex_output:6;
unsigned direct:1;
unsigned access:3;
unsigned invalid:1;
@@ -208,6 +197,15 @@ union kvm_mmu_page_role {
unsigned cr0_wp:1;
unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
+ unsigned :8;
+
+ /*
+ * This is left at the top of the word so that
+ * kvm_memslots_for_spte_role can extract it with a
+ * simple shift. While there is room, give it a whole
+ * byte so it is also faster to load it from memory.
+ */
+ unsigned smm:8;
};
};
@@ -338,12 +336,28 @@ struct kvm_pmu {
u64 reprogram_pmi;
};
+struct kvm_pmu_ops;
+
enum {
KVM_DEBUGREG_BP_ENABLED = 1,
KVM_DEBUGREG_WONT_EXIT = 2,
KVM_DEBUGREG_RELOAD = 4,
};
+struct kvm_mtrr_range {
+ u64 base;
+ u64 mask;
+ struct list_head node;
+};
+
+struct kvm_mtrr {
+ struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
+ mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
+ u64 deftype;
+
+ struct list_head head;
+};
+
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -368,6 +382,7 @@ struct kvm_vcpu_arch {
int32_t apic_arb_prio;
int mp_state;
u64 ia32_misc_enable_msr;
+ u64 smbase;
bool tpr_access_reporting;
u64 ia32_xss;
@@ -471,8 +486,9 @@ struct kvm_vcpu_arch {
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */
bool nmi_injected; /* Trying to inject an NMI this entry */
+ bool smi_pending; /* SMI queued after currently running handler */
- struct mtrr_state_type mtrr_state;
+ struct kvm_mtrr mtrr_state;
u64 pat;
unsigned switch_db_regs;
@@ -637,6 +653,8 @@ struct kvm_arch {
#endif
bool boot_vcpu_runs_old_kvmclock;
+
+ u64 disabled_quirks;
};
struct kvm_vm_stat {
@@ -689,12 +707,13 @@ struct msr_data {
struct kvm_lapic_irq {
u32 vector;
- u32 delivery_mode;
- u32 dest_mode;
- u32 level;
- u32 trig_mode;
+ u16 delivery_mode;
+ u16 dest_mode;
+ bool level;
+ u16 trig_mode;
u32 shorthand;
u32 dest_id;
+ bool msi_redir_hint;
};
struct kvm_x86_ops {
@@ -706,19 +725,20 @@ struct kvm_x86_ops {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
+ bool (*cpu_has_high_real_mode_segbase)(void);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
/* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
- void (*vcpu_reset)(struct kvm_vcpu *vcpu);
+ void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
void (*vcpu_put)(struct kvm_vcpu *vcpu);
void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
- int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
+ int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
void (*get_segment)(struct kvm_vcpu *vcpu,
@@ -836,6 +856,8 @@ struct kvm_x86_ops {
void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t offset, unsigned long mask);
+ /* pmu operations of sub-arch */
+ const struct kvm_pmu_ops *pmu_ops;
};
struct kvm_arch_async_pf {
@@ -871,7 +893,7 @@ void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
struct kvm_memory_slot *memslot);
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
+ const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot);
void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@ -882,7 +904,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
-void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
+void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, struct kvm_memslots *slots);
unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
@@ -890,7 +912,6 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3);
int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
-u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
struct kvm_irq_mask_notifier {
void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
@@ -938,7 +959,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
+int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
struct x86_emulate_ctxt;
@@ -967,7 +988,7 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
@@ -1110,6 +1131,14 @@ enum {
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
#define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */
+#define HF_SMM_MASK (1 << 6)
+#define HF_SMM_INSIDE_NMI_MASK (1 << 7)
+
+#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+#define KVM_ADDRESS_SPACE_NUM 2
+
+#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
/*
* Hardware virtualization extension instructions may fault if a
@@ -1144,7 +1173,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
-void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
unsigned long address);
@@ -1168,16 +1197,9 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
int kvm_is_in_guest(void);
-void kvm_pmu_init(struct kvm_vcpu *vcpu);
-void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
-void kvm_pmu_reset(struct kvm_vcpu *vcpu);
-void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu);
-bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr);
-int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
-int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc);
-int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
-void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
+int __x86_set_memory_region(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem);
+int x86_set_memory_region(struct kvm *kvm,
+ const struct kvm_userspace_memory_region *mem);
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 6167fd798188..655e07a48f6c 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -41,5 +41,6 @@ struct pvclock_wall_clock {
#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
#define PVCLOCK_GUEST_STOPPED (1 << 1)
+#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e9fa28..628954ceede1 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -86,7 +86,6 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
offset = pvclock_get_nsec_offset(src);
ret = src->system_time + offset;
ret_flags = src->flags;
- rdtsc_barrier();
*cycles = ret;
*flags = ret_flags;
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d7dcef58aefa..a4ae82eb82aa 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -106,6 +106,8 @@ struct kvm_ioapic_state {
#define KVM_IRQCHIP_IOAPIC 2
#define KVM_NR_IRQCHIPS 3
+#define KVM_RUN_X86_SMM (1 << 0)
+
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
@@ -281,6 +283,7 @@ struct kvm_reinject_control {
#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001
#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002
#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
+#define KVM_VCPUEVENT_VALID_SMM 0x00000008
/* Interrupt shadow states */
#define KVM_X86_SHADOW_INT_MOV_SS 0x01
@@ -309,7 +312,13 @@ struct kvm_vcpu_events {
} nmi;
__u32 sipi_vector;
__u32 flags;
- __u32 reserved[10];
+ struct {
+ __u8 smm;
+ __u8 pending;
+ __u8 smm_inside_nmi;
+ __u8 latched_init;
+ } smi;
+ __u32 reserved[9];
};
/* for KVM_GET/SET_DEBUGREGS */
@@ -345,4 +354,7 @@ struct kvm_xcrs {
struct kvm_sync_regs {
};
+#define KVM_QUIRK_LINT0_REENABLED (1 << 0)
+#define KVM_QUIRK_CD_NW_CLEARED (1 << 1)
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1681504e44a4..47190bd399e7 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -331,7 +331,7 @@ static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
apic_write(APIC_EOI, APIC_EOI_ACK);
}
-void kvm_guest_cpu_init(void)
+static void kvm_guest_cpu_init(void)
{
if (!kvm_para_available())
return;
@@ -688,7 +688,7 @@ static inline void spin_time_accum_blocked(u64 start)
static struct dentry *d_spin_debug;
static struct dentry *d_kvm_debug;
-struct dentry *kvm_init_debugfs(void)
+static struct dentry *kvm_init_debugfs(void)
{
d_kvm_debug = debugfs_create_dir("kvm-guest", NULL);
if (!d_kvm_debug)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 42caaef897c8..49487b488061 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -24,6 +24,7 @@
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <linux/memblock.h>
+#include <linux/sched.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
@@ -217,8 +218,10 @@ static void kvm_shutdown(void)
void __init kvmclock_init(void)
{
+ struct pvclock_vcpu_time_info *vcpu_time;
unsigned long mem;
- int size;
+ int size, cpu;
+ u8 flags;
size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
@@ -264,7 +267,14 @@ void __init kvmclock_init(void)
pv_info.name = "KVM";
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
- pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
+ pvclock_set_flags(~0);
+
+ cpu = get_cpu();
+ vcpu_time = &hv_clock[cpu].pvti;
+ flags = pvclock_read_flags(vcpu_time);
+ if (flags & PVCLOCK_COUNTS_FROM_ZERO)
+ set_sched_clock_stable();
+ put_cpu();
}
int __init kvm_setup_vsyscall_timeinfo(void)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 413a7bf9efbb..d8a1d56276e1 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -86,15 +86,16 @@ config KVM_MMU_AUDIT
auditing of KVM MMU events at runtime.
config KVM_DEVICE_ASSIGNMENT
- bool "KVM legacy PCI device assignment support"
+ bool "KVM legacy PCI device assignment support (DEPRECATED)"
depends on KVM && PCI && IOMMU_API
- default y
+ default n
---help---
Provide support for legacy PCI device assignment through KVM. The
kernel now also supports a full featured userspace device driver
- framework through VFIO, which supersedes much of this support.
+ framework through VFIO, which supersedes this support and provides
+ better security.
- If unsure, say Y.
+ If unsure, say N.
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 16e8f962eaad..67d215cb8953 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,10 +12,10 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
- i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+ i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o
kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
-kvm-intel-y += vmx.o
-kvm-amd-y += svm.o
+kvm-intel-y += vmx.o pmu_intel.o
+kvm-amd-y += svm.o pmu_amd.o
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9f705e618af5..64dd46793099 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -16,12 +16,14 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
+#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */
#include <asm/user.h>
#include <asm/fpu/xstate.h>
#include "cpuid.h"
#include "lapic.h"
#include "mmu.h"
#include "trace.h"
+#include "pmu.h"
static u32 xstate_required_size(u64 xstate_bv, bool compacted)
{
@@ -95,7 +97,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
+ vcpu->arch.eager_fpu = use_eager_fpu() || guest_cpuid_has_mpx(vcpu);
/*
* The existing code assumes virtual address is 48-bit in the canonical
@@ -109,7 +111,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
/* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
- kvm_pmu_cpuid_update(vcpu);
+ kvm_pmu_refresh(vcpu);
return 0;
}
@@ -413,6 +415,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
break;
}
+ case 6: /* Thermal management */
+ entry->eax = 0x4; /* allow ARAT */
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ break;
case 7: {
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* Mask ebx against host capability word 9 */
@@ -589,7 +597,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
break;
case 3: /* Processor serial number */
case 5: /* MONITOR/MWAIT */
- case 6: /* Thermal management */
case 0xC0000002:
case 0xC0000003:
case 0xC0000004:
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 496b3695d3d3..dd05b9cef6ae 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -70,6 +70,14 @@ static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_FSGSBASE));
}
+static inline bool guest_cpuid_has_longmode(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
+ return best && (best->edx & bit(X86_FEATURE_LM));
+}
+
static inline bool guest_cpuid_has_osvw(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 630bcb0d7a04..e7a4fde5d631 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -25,6 +25,7 @@
#include <linux/module.h>
#include <asm/kvm_emulate.h>
#include <linux/stringify.h>
+#include <asm/debugreg.h>
#include "x86.h"
#include "tss.h"
@@ -523,13 +524,9 @@ static void masked_increment(ulong *reg, ulong mask, int inc)
static inline void
register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
{
- ulong mask;
+ ulong *preg = reg_rmw(ctxt, reg);
- if (ctxt->ad_bytes == sizeof(unsigned long))
- mask = ~0UL;
- else
- mask = ad_mask(ctxt);
- masked_increment(reg_rmw(ctxt, reg), mask, inc);
+ assign_register(preg, *preg + inc, ctxt->ad_bytes);
}
static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
@@ -2262,6 +2259,260 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
return rc;
}
+static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
+{
+ u32 eax, ebx, ecx, edx;
+
+ eax = 0x80000001;
+ ecx = 0;
+ ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
+ return edx & bit(X86_FEATURE_LM);
+}
+
+#define GET_SMSTATE(type, smbase, offset) \
+ ({ \
+ type __val; \
+ int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \
+ sizeof(__val), NULL); \
+ if (r != X86EMUL_CONTINUE) \
+ return X86EMUL_UNHANDLEABLE; \
+ __val; \
+ })
+
+static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+{
+ desc->g = (flags >> 23) & 1;
+ desc->d = (flags >> 22) & 1;
+ desc->l = (flags >> 21) & 1;
+ desc->avl = (flags >> 20) & 1;
+ desc->p = (flags >> 15) & 1;
+ desc->dpl = (flags >> 13) & 3;
+ desc->s = (flags >> 12) & 1;
+ desc->type = (flags >> 8) & 15;
+}
+
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+{
+ struct desc_struct desc;
+ int offset;
+ u16 selector;
+
+ selector = GET_SMSTATE(u32, smbase, 0x7fa8 + n * 4);
+
+ if (n < 3)
+ offset = 0x7f84 + n * 12;
+ else
+ offset = 0x7f2c + (n - 3) * 12;
+
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, offset));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+ return X86EMUL_CONTINUE;
+}
+
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
+{
+ struct desc_struct desc;
+ int offset;
+ u16 selector;
+ u32 base3;
+
+ offset = 0x7e00 + n * 16;
+
+ selector = GET_SMSTATE(u16, smbase, offset);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smbase, offset + 2) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, offset + 4));
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, offset + 8));
+ base3 = GET_SMSTATE(u32, smbase, offset + 12);
+
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+ return X86EMUL_CONTINUE;
+}
+
+static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+ u64 cr0, u64 cr4)
+{
+ int bad;
+
+ /*
+ * First enable PAE, long mode needs it before CR0.PG = 1 is set.
+ * Then enable protected mode. However, PCID cannot be enabled
+ * if EFER.LMA=0, so set it separately.
+ */
+ bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+
+ if (cr4 & X86_CR4_PCIDE) {
+ bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+ if (bad)
+ return X86EMUL_UNHANDLEABLE;
+ }
+
+ return X86EMUL_CONTINUE;
+}
+
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
+{
+ struct desc_struct desc;
+ struct desc_ptr dt;
+ u16 selector;
+ u32 val, cr0, cr4;
+ int i;
+
+ cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
+ ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
+ ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
+ ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
+
+ for (i = 0; i < 8; i++)
+ *reg_write(ctxt, i) = GET_SMSTATE(u32, smbase, 0x7fd0 + i * 4);
+
+ val = GET_SMSTATE(u32, smbase, 0x7fcc);
+ ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+ val = GET_SMSTATE(u32, smbase, 0x7fc8);
+ ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+ selector = GET_SMSTATE(u32, smbase, 0x7fc4);
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f64));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f60));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f5c));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+
+ selector = GET_SMSTATE(u32, smbase, 0x7fc0);
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7f80));
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7f7c));
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7f78));
+ ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+
+ dt.address = GET_SMSTATE(u32, smbase, 0x7f74);
+ dt.size = GET_SMSTATE(u32, smbase, 0x7f70);
+ ctxt->ops->set_gdt(ctxt, &dt);
+
+ dt.address = GET_SMSTATE(u32, smbase, 0x7f58);
+ dt.size = GET_SMSTATE(u32, smbase, 0x7f54);
+ ctxt->ops->set_idt(ctxt, &dt);
+
+ for (i = 0; i < 6; i++) {
+ int r = rsm_load_seg_32(ctxt, smbase, i);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+ }
+
+ cr4 = GET_SMSTATE(u32, smbase, 0x7f14);
+
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
+
+ return rsm_enter_protected_mode(ctxt, cr0, cr4);
+}
+
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
+{
+ struct desc_struct desc;
+ struct desc_ptr dt;
+ u64 val, cr0, cr4;
+ u32 base3;
+ u16 selector;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ *reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
+
+ ctxt->_eip = GET_SMSTATE(u64, smbase, 0x7f78);
+ ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7f70) | X86_EFLAGS_FIXED;
+
+ val = GET_SMSTATE(u32, smbase, 0x7f68);
+ ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1);
+ val = GET_SMSTATE(u32, smbase, 0x7f60);
+ ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
+
+ cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
+ ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
+ cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
+ ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
+ val = GET_SMSTATE(u64, smbase, 0x7ed0);
+ ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA);
+
+ selector = GET_SMSTATE(u32, smbase, 0x7e90);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e92) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e94));
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e98));
+ base3 = GET_SMSTATE(u32, smbase, 0x7e9c);
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+
+ dt.size = GET_SMSTATE(u32, smbase, 0x7e84);
+ dt.address = GET_SMSTATE(u64, smbase, 0x7e88);
+ ctxt->ops->set_idt(ctxt, &dt);
+
+ selector = GET_SMSTATE(u32, smbase, 0x7e70);
+ rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smbase, 0x7e72) << 8);
+ set_desc_limit(&desc, GET_SMSTATE(u32, smbase, 0x7e74));
+ set_desc_base(&desc, GET_SMSTATE(u32, smbase, 0x7e78));
+ base3 = GET_SMSTATE(u32, smbase, 0x7e7c);
+ ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+
+ dt.size =