summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 09:49:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 09:49:13 -0700
commitfe38bd6862074c0a2b9be7f31f043aaa70b2af5f (patch)
tree34edf3f546188b108c513b3f8499e45afe37aad9 /arch/x86/kvm
parent404e634fdb96a3c99c7517353bfafbd88e04ab41 (diff)
parentfb3925d06c285e1acb248addc5d80b33ea771b0f (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "s390: - ioctl hardening - selftests ARM: - ITS translation cache - support for 512 vCPUs - various cleanups and bugfixes PPC: - various minor fixes and preparation x86: - bugfixes all over the place (posted interrupts, SVM, emulation corner cases, blocked INIT) - some IPI optimizations" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (75 commits) KVM: X86: Use IPI shorthands in kvm guest when support KVM: x86: Fix INIT signal handling in various CPU states KVM: VMX: Introduce exit reason for receiving INIT signal on guest-mode KVM: VMX: Stop the preemption timer during vCPU reset KVM: LAPIC: Micro optimize IPI latency kvm: Nested KVM MMUs need PAE root too KVM: x86: set ctxt->have_exception in x86_decode_insn() KVM: x86: always stop emulation on page fault KVM: nVMX: trace nested VM-Enter failures detected by H/W KVM: nVMX: add tracepoint for failed nested VM-Enter x86: KVM: svm: Fix a check in nested_svm_vmrun() KVM: x86: Return to userspace with internal error on unexpected exit reason KVM: x86: Add kvm_emulate_{rd,wr}msr() to consolidate VXM/SVM code KVM: x86: Refactor up kvm_{g,s}et_msr() to simplify callers doc: kvm: Fix return description of KVM_SET_MSRS KVM: X86: Tune PLE Window tracepoint KVM: VMX: Change ple_window type to unsigned int KVM: X86: Remove tailing newline for tracepoints KVM: X86: Trace vcpu_id for vmexit KVM: x86: Manually calculate reserved bits when loading PDPTRS ...
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/cpuid.c27
-rw-r--r--arch/x86/kvm/emulate.c27
-rw-r--r--arch/x86/kvm/lapic.c20
-rw-r--r--arch/x86/kvm/mmu.c61
-rw-r--r--arch/x86/kvm/mmu.h2
-rw-r--r--arch/x86/kvm/svm.c198
-rw-r--r--arch/x86/kvm/trace.h74
-rw-r--r--arch/x86/kvm/vmx/nested.c305
-rw-r--r--arch/x86/kvm/vmx/vmenter.S4
-rw-r--r--arch/x86/kvm/vmx/vmx.c94
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c197
-rw-r--r--arch/x86/kvm/x86.h2
13 files changed, 584 insertions, 429 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 22c2720cd948..dd5985eb61b4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -392,6 +392,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
cpuid_mask(&entry->edx, CPUID_7_EDX);
+ if (boot_cpu_has(X86_FEATURE_IBPB) && boot_cpu_has(X86_FEATURE_IBRS))
+ entry->edx |= F(SPEC_CTRL);
+ if (boot_cpu_has(X86_FEATURE_STIBP))
+ entry->edx |= F(INTEL_STIBP);
+ if (boot_cpu_has(X86_FEATURE_SSBD))
+ entry->edx |= F(SPEC_CTRL_SSBD);
/*
* We emulate ARCH_CAPABILITIES in software even
* if the host doesn't support it.
@@ -729,18 +735,23 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
+ entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
+ cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
/*
- * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
- * hardware cpuid
+ * AMD has separate bits for each SPEC_CTRL bit.
+ * arch/x86/kernel/cpu/bugs.c is kind enough to
+ * record that in cpufeatures so use them.
*/
- if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
+ if (boot_cpu_has(X86_FEATURE_IBPB))
entry->ebx |= F(AMD_IBPB);
- if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
+ if (boot_cpu_has(X86_FEATURE_IBRS))
entry->ebx |= F(AMD_IBRS);
- if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
- entry->ebx |= F(VIRT_SSBD);
- entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
- cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ if (boot_cpu_has(X86_FEATURE_STIBP))
+ entry->ebx |= F(AMD_STIBP);
+ if (boot_cpu_has(X86_FEATURE_SSBD))
+ entry->ebx |= F(AMD_SSBD);
+ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ entry->ebx |= F(AMD_SSB_NO);
/*
* The preference is to use SPEC CTRL MSR instead of the
* VIRT_SPEC MSR.
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 718f7d9afedc..698efb8c3897 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4156,6 +4156,20 @@ out:
return rc;
}
+static int em_xsetbv(struct x86_emulate_ctxt *ctxt)
+{
+ u32 eax, ecx, edx;
+
+ eax = reg_read(ctxt, VCPU_REGS_RAX);
+ edx = reg_read(ctxt, VCPU_REGS_RDX);
+ ecx = reg_read(ctxt, VCPU_REGS_RCX);
+
+ if (ctxt->ops->set_xcr(ctxt, ecx, ((u64)edx << 32) | eax))
+ return emulate_gp(ctxt, 0);
+
+ return X86EMUL_CONTINUE;
+}
+
static bool valid_cr(int nr)
{
switch (nr) {
@@ -4409,6 +4423,12 @@ static const struct opcode group7_rm1[] = {
N, N, N, N, N, N,
};
+static const struct opcode group7_rm2[] = {
+ N,
+ II(ImplicitOps | Priv, em_xsetbv, xsetbv),
+ N, N, N, N, N, N,
+};
+
static const struct opcode group7_rm3[] = {
DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
@@ -4498,7 +4518,8 @@ static const struct group_dual group7 = { {
}, {
EXT(0, group7_rm0),
EXT(0, group7_rm1),
- N, EXT(0, group7_rm3),
+ EXT(0, group7_rm2),
+ EXT(0, group7_rm3),
II(SrcNone | DstMem | Mov, em_smsw, smsw), N,
II(SrcMem16 | Mov | Priv, em_lmsw, lmsw),
EXT(0, group7_rm7),
@@ -5144,7 +5165,7 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len)
else {
rc = __do_insn_fetch_bytes(ctxt, 1);
if (rc != X86EMUL_CONTINUE)
- return rc;
+ goto done;
}
switch (mode) {
@@ -5395,6 +5416,8 @@ done_prefixes:
ctxt->memopp->addr.mem.ea + ctxt->_eip);
done:
+ if (rc == X86EMUL_PROPAGATE_FAULT)
+ ctxt->have_exception = true;
return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2a4f278f3b56..8675458c2205 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1198,10 +1198,8 @@ void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
}
EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
-static void apic_send_ipi(struct kvm_lapic *apic)
+static void apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
{
- u32 icr_low = kvm_lapic_get_reg(apic, APIC_ICR);
- u32 icr_high = kvm_lapic_get_reg(apic, APIC_ICR2);
struct kvm_lapic_irq irq;
irq.vector = icr_low & APIC_VECTOR_MASK;
@@ -1914,8 +1912,9 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
}
case APIC_ICR:
/* No delay here, so we always clear the pending bit */
- kvm_lapic_set_reg(apic, APIC_ICR, val & ~(1 << 12));
- apic_send_ipi(apic);
+ val &= ~(1 << 12);
+ apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
+ kvm_lapic_set_reg(apic, APIC_ICR, val);
break;
case APIC_ICR2:
@@ -2707,11 +2706,14 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
return;
/*
- * INITs are latched while in SMM. Because an SMM CPU cannot
- * be in KVM_MP_STATE_INIT_RECEIVED state, just eat SIPIs
- * and delay processing of INIT until the next RSM.
+ * INITs are latched while CPU is in specific states
+ * (SMM, VMX non-root mode, SVM with GIF=0).
+ * Because a CPU cannot be in these states immediately
+ * after it has processed an INIT signal (and thus in
+ * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
+ * and leave the INIT pending.
*/
- if (is_smm(vcpu)) {
+ if (is_smm(vcpu) || kvm_x86_ops->apic_init_signal_blocked(vcpu)) {
WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
if (test_bit(KVM_APIC_SIPI, &apic->pending_events))
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a63964e7cec7..a10af9c87f8a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -214,6 +214,7 @@ static u64 __read_mostly shadow_accessed_mask;
static u64 __read_mostly shadow_dirty_mask;
static u64 __read_mostly shadow_mmio_mask;
static u64 __read_mostly shadow_mmio_value;
+static u64 __read_mostly shadow_mmio_access_mask;
static u64 __read_mostly shadow_present_mask;
static u64 __read_mostly shadow_me_mask;
@@ -299,14 +300,21 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
kvm_flush_remote_tlbs_with_range(kvm, &range);
}
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask)
{
+ BUG_ON((u64)(unsigned)access_mask != access_mask);
BUG_ON((mmio_mask & mmio_value) != mmio_value);
shadow_mmio_value = mmio_value | SPTE_SPECIAL_MASK;
shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK;
+ shadow_mmio_access_mask = access_mask;
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
+static bool is_mmio_spte(u64 spte)
+{
+ return (spte & shadow_mmio_mask) == shadow_mmio_value;
+}
+
static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
{
return sp->role.ad_disabled;
@@ -314,19 +322,19 @@ static inline bool sp_ad_disabled(struct kvm_mmu_page *sp)
static inline bool spte_ad_enabled(u64 spte)
{
- MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+ MMU_WARN_ON(is_mmio_spte(spte));
return !(spte & shadow_acc_track_value);
}
static inline u64 spte_shadow_accessed_mask(u64 spte)
{
- MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+ MMU_WARN_ON(is_mmio_spte(spte));
return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
}
static inline u64 spte_shadow_dirty_mask(u64 spte)
{
- MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value);
+ MMU_WARN_ON(is_mmio_spte(spte));
return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
}
@@ -389,7 +397,7 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
u64 mask = generation_mmio_spte_mask(gen);
u64 gpa = gfn << PAGE_SHIFT;
- access &= ACC_WRITE_MASK | ACC_USER_MASK;
+ access &= shadow_mmio_access_mask;
mask |= shadow_mmio_value | access;
mask |= gpa | shadow_nonpresent_or_rsvd_mask;
mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
@@ -401,11 +409,6 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
mmu_spte_set(sptep, mask);
}
-static bool is_mmio_spte(u64 spte)
-{
- return (spte & shadow_mmio_mask) == shadow_mmio_value;
-}
-
static gfn_t get_mmio_spte_gfn(u64 spte)
{
u64 gpa = spte & shadow_nonpresent_or_rsvd_lower_gfn_mask;
@@ -418,8 +421,7 @@ static gfn_t get_mmio_spte_gfn(u64 spte)
static unsigned get_mmio_spte_access(u64 spte)
{
- u64 mask = generation_mmio_spte_mask(MMIO_SPTE_GEN_MASK) | shadow_mmio_mask;
- return (spte & ~mask) & ~PAGE_MASK;
+ return spte & shadow_mmio_access_mask;
}
static bool set_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn,
@@ -3302,7 +3304,8 @@ static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
}
if (unlikely(is_noslot_pfn(pfn)))
- vcpu_cache_mmio_info(vcpu, gva, gfn, access);
+ vcpu_cache_mmio_info(vcpu, gva, gfn,
+ access & shadow_mmio_access_mask);
return false;
}
@@ -5611,13 +5614,13 @@ slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
PT_PAGE_TABLE_LEVEL, lock_flush_tlb);
}
-static void free_mmu_pages(struct kvm_vcpu *vcpu)
+static void free_mmu_pages(struct kvm_mmu *mmu)
{
- free_page((unsigned long)vcpu->arch.mmu->pae_root);
- free_page((unsigned long)vcpu->arch.mmu->lm_root);
+ free_page((unsigned long)mmu->pae_root);
+ free_page((unsigned long)mmu->lm_root);
}
-static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
+static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
{
struct page *page;
int i;
@@ -5638,9 +5641,9 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
if (!page)
return -ENOMEM;
- vcpu->arch.mmu->pae_root = page_address(page);
+ mmu->pae_root = page_address(page);
for (i = 0; i < 4; ++i)
- vcpu->arch.mmu->pae_root[i] = INVALID_PAGE;
+ mmu->pae_root[i] = INVALID_PAGE;
return 0;
}
@@ -5648,6 +5651,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
int kvm_mmu_create(struct kvm_vcpu *vcpu)
{
uint i;
+ int ret;
vcpu->arch.mmu = &vcpu->arch.root_mmu;
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
@@ -5665,7 +5669,19 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID;
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
- return alloc_mmu_pages(vcpu);
+
+ ret = alloc_mmu_pages(vcpu, &vcpu->arch.guest_mmu);
+ if (ret)
+ return ret;
+
+ ret = alloc_mmu_pages(vcpu, &vcpu->arch.root_mmu);
+ if (ret)
+ goto fail_allocate_root;
+
+ return ret;
+ fail_allocate_root:
+ free_mmu_pages(&vcpu->arch.guest_mmu);
+ return ret;
}
@@ -6094,7 +6110,7 @@ static void kvm_set_mmio_spte_mask(void)
if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
mask &= ~1ull;
- kvm_mmu_set_mmio_spte_mask(mask, mask);
+ kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
}
int kvm_mmu_module_init(void)
@@ -6168,7 +6184,8 @@ unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm)
void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
{
kvm_mmu_unload(vcpu);
- free_mmu_pages(vcpu);
+ free_mmu_pages(&vcpu->arch.root_mmu);
+ free_mmu_pages(&vcpu->arch.guest_mmu);
mmu_free_memory_caches(vcpu);
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 54c2a377795b..11f8ec89433b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -51,7 +51,7 @@ static inline u64 rsvd_bits(int s, int e)
return ((1ULL << (e - s + 1)) - 1) << s;
}
-void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value);
+void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask);
void
reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e0368076a1ef..04fe21849b6e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -68,10 +68,8 @@ MODULE_DEVICE_TABLE(x86cpu, svm_cpu_id);
#define SEG_TYPE_LDT 2
#define SEG_TYPE_BUSY_TSS16 3
-#define SVM_FEATURE_NPT (1 << 0)
#define SVM_FEATURE_LBRV (1 << 1)
#define SVM_FEATURE_SVML (1 << 2)
-#define SVM_FEATURE_NRIP (1 << 3)
#define SVM_FEATURE_TSC_RATE (1 << 4)
#define SVM_FEATURE_VMCB_CLEAN (1 << 5)
#define SVM_FEATURE_FLUSH_ASID (1 << 6)
@@ -770,7 +768,7 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
}
-static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
+static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -779,18 +777,17 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
svm->next_rip = svm->vmcb->control.next_rip;
}
- if (!svm->next_rip) {
- if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
- EMULATE_DONE)
- printk(KERN_DEBUG "%s: NOP\n", __func__);
- return;
- }
+ if (!svm->next_rip)
+ return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP);
+
if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
__func__, kvm_rip_read(vcpu), svm->next_rip);
kvm_rip_write(vcpu, svm->next_rip);
svm_set_interrupt_shadow(vcpu, 0);
+
+ return EMULATE_DONE;
}
static void svm_queue_exception(struct kvm_vcpu *vcpu)
@@ -821,7 +818,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
* raises a fault that is not intercepted. Still better than
* failing in all cases.
*/
- skip_emulated_instruction(&svm->vcpu);
+ (void)skip_emulated_instruction(&svm->vcpu);
rip = kvm_rip_read(&svm->vcpu);
svm->int3_rip = rip + svm->vmcb->save.cs.base;
svm->int3_injected = rip - old_rip;
@@ -1269,11 +1266,11 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
pause_filter_count_grow,
pause_filter_count_max);
- if (control->pause_filter_count != old)
+ if (control->pause_filter_count != old) {
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-
- trace_kvm_ple_window_grow(vcpu->vcpu_id,
- control->pause_filter_count, old);
+ trace_kvm_ple_window_update(vcpu->vcpu_id,
+ control->pause_filter_count, old);
+ }
}
static void shrink_ple_window(struct kvm_vcpu *vcpu)
@@ -1287,11 +1284,11 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
pause_filter_count,
pause_filter_count_shrink,
pause_filter_count);
- if (control->pause_filter_count != old)
+ if (control->pause_filter_count != old) {
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
-
- trace_kvm_ple_window_shrink(vcpu->vcpu_id,
- control->pause_filter_count, old);
+ trace_kvm_ple_window_update(vcpu->vcpu_id,
+ control->pause_filter_count, old);
+ }
}
static __init int svm_hardware_setup(void)
@@ -2136,6 +2133,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
struct page *nested_msrpm_pages;
int err;
+ BUILD_BUG_ON_MSG(offsetof(struct vcpu_svm, vcpu) != 0,
+ "struct kvm_vcpu must be at offset 0 for arch usercopy region");
+
svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT);
if (!svm) {
err = -ENOMEM;
@@ -2903,13 +2903,11 @@ static int nop_on_interception(struct vcpu_svm *svm)
static int halt_interception(struct vcpu_svm *svm)
{
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
return kvm_emulate_halt(&svm->vcpu);
}
static int vmmcall_interception(struct vcpu_svm *svm)
{
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
return kvm_emulate_hypercall(&svm->vcpu);
}
@@ -3588,9 +3586,9 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
mark_all_dirty(svm->vmcb);
}
-static bool nested_svm_vmrun(struct vcpu_svm *svm)
+static int nested_svm_vmrun(struct vcpu_svm *svm)
{
- int rc;
+ int ret;
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
@@ -3599,13 +3597,16 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
vmcb_gpa = svm->vmcb->save.rax;
- rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
- if (rc) {
- if (rc == -EINVAL)
- kvm_inject_gp(&svm->vcpu, 0);
- return false;
+ ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
+ if (ret == -EINVAL) {
+ kvm_inject_gp(&svm->vcpu, 0);
+ return 1;
+ } else if (ret) {
+ return kvm_skip_emulated_instruction(&svm->vcpu);
}
+ ret = kvm_skip_emulated_instruction(&svm->vcpu);
+
nested_vmcb = map.hva;
if (!nested_vmcb_checks(nested_vmcb)) {
@@ -3616,7 +3617,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
kvm_vcpu_unmap(&svm->vcpu, &map, true);
- return false;
+ return ret;
}
trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
@@ -3660,7 +3661,16 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
- return true;
+ if (!nested_svm_vmrun_msrpm(svm)) {
+ svm->vmcb->control.exit_code = SVM_EXIT_ERR;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = 0;
+ svm->vmcb->control.exit_info_2 = 0;
+
+ nested_svm_vmexit(svm);
+ }
+
+ return ret;
}
static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
@@ -3697,7 +3707,6 @@ static int vmload_interception(struct vcpu_svm *svm)
nested_vmcb = map.hva;
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
@@ -3724,7 +3733,6 @@ static int vmsave_interception(struct vcpu_svm *svm)
nested_vmcb = map.hva;
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
@@ -3738,27 +3746,7 @@ static int vmrun_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
- /* Save rip after vmrun instruction */
- kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
-
- if (!nested_svm_vmrun(svm))
- return 1;
-
- if (!nested_svm_vmrun_msrpm(svm))
- goto failed;
-
- return 1;
-
-failed:
-
- svm->vmcb->control.exit_code = SVM_EXIT_ERR;
- svm->vmcb->control.exit_code_hi = 0;
- svm->vmcb->control.exit_info_1 = 0;
- svm->vmcb->control.exit_info_2 = 0;
-
- nested_svm_vmexit(svm);
-
- return 1;
+ return nested_svm_vmrun(svm);
}
static int stgi_interception(struct vcpu_svm *svm)
@@ -3775,7 +3763,6 @@ static int stgi_interception(struct vcpu_svm *svm)
if (vgif_enabled(svm))
clr_intercept(svm, INTERCEPT_STGI);
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
@@ -3791,7 +3778,6 @@ static int clgi_interception(struct vcpu_svm *svm)
if (nested_svm_check_permissions(svm))
return 1;
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
disable_gif(svm);
@@ -3816,7 +3802,6 @@ static int invlpga_interception(struct vcpu_svm *svm)
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
return kvm_skip_emulated_instruction(&svm->vcpu);
}
@@ -3839,7 +3824,6 @@ static int xsetbv_interception(struct vcpu_svm *svm)
u32 index = kvm_rcx_read(&svm->vcpu);
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
return kvm_skip_emulated_instruction(&svm->vcpu);
}
@@ -3898,25 +3882,29 @@ static int task_switch_interception(struct vcpu_svm *svm)
if (reason != TASK_SWITCH_GATE ||
int_type == SVM_EXITINTINFO_TYPE_SOFT ||
(int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
- (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
- skip_emulated_instruction(&svm->vcpu);
+ (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) {
+ if (skip_emulated_instruction(&svm->vcpu) != EMULATE_DONE)
+ goto fail;
+ }
if (int_type != SVM_EXITINTINFO_TYPE_SOFT)
int_vec = -1;
if (kvm_task_switch(&svm->vcpu, tss_selector, int_vec, reason,
- has_error_code, error_code) == EMULATE_FAIL) {
- svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- svm->vcpu.run->internal.ndata = 0;
- return 0;
- }
+ has_error_code, error_code) == EMULATE_FAIL)
+ goto fail;
+
return 1;
+
+fail:
+ svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ svm->vcpu.run->internal.ndata = 0;
+ return 0;
}
static int cpuid_interception(struct vcpu_svm *svm)
{
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
return kvm_emulate_cpuid(&svm->vcpu);
}
@@ -4232,23 +4220,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int rdmsr_interception(struct vcpu_svm *svm)
{
- u32 ecx = kvm_rcx_read(&svm->vcpu);
- struct msr_data msr_info;
-
- msr_info.index = ecx;
- msr_info.host_initiated = false;
- if (svm_get_msr(&svm->vcpu, &msr_info)) {
- trace_kvm_msr_read_ex(ecx);
- kvm_inject_gp(&svm->vcpu, 0);
- return 1;
- } else {
- trace_kvm_msr_read(ecx, msr_info.data);
-
- kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff);
- kvm_rdx_write(&svm->vcpu, msr_info.data >> 32);
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
- return kvm_skip_emulated_instruction(&svm->vcpu);
- }
+ return kvm_emulate_rdmsr(&svm->vcpu);
}
static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@@ -4438,23 +4410,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
static int wrmsr_interception(struct vcpu_svm *svm)
{
- struct msr_data msr;
- u32 ecx = kvm_rcx_read(&svm->vcpu);
- u64 data = kvm_read_edx_eax(&svm->vcpu);
-
- msr.data = data;
- msr.index = ecx;
- msr.host_initiated = false;
-
- svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
- if (kvm_set_msr(&svm->vcpu, &msr)) {
- trace_kvm_msr_write_ex(ecx, data);
- kvm_inject_gp(&svm->vcpu, 0);
- return 1;
- } else {
- trace_kvm_msr_write(ecx, data);
- return kvm_skip_emulated_instruction(&svm->vcpu);
- }
+ return kvm_emulate_wrmsr(&svm->vcpu);
}
static int msr_interception(struct vcpu_svm *svm)
@@ -5025,9 +4981,14 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
- WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
+ vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code);
+ dump_vmcb(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror =
+ KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+ vcpu->run->internal.ndata = 1;
+ vcpu->run->internal.data[0] = exit_code;
+ return 0;
}
return svm_exit_handlers[exit_code](svm);
@@ -5274,7 +5235,8 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
kvm_set_msi_irq(kvm, e, &irq);
- if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
+ !kvm_irq_is_postable(&irq)) {
pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
__func__, irq.vector);
return -1;
@@ -5328,6 +5290,7 @@ static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
* 1. When cannot target interrupt to a specific vcpu.
* 2. Unsetting posted interrupt.
* 3. APIC virtialization is disabled for the vcpu.
+ * 4. IRQ has incompatible delivery mode (SMI, INIT, etc)
*/
if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
kvm_vcpu_apicv_active(&svm->vcpu)) {
@@ -5933,6 +5896,8 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu)
guest_cpuid_clear(vcpu, X86_FEATURE_X2APIC);
}
+#define F(x) bit(X86_FEATURE_##x)
+
static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
{
switch (func) {
@@ -5944,6 +5909,11 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
if (nested)
entry->ecx |= (1 << 2); /* Set SVM bit */
break;
+ case 0x80000008:
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
+ boot_cpu_has(X86_FEATURE_AMD_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
+ break;
case 0x8000000A:
entry->eax = 1; /* SVM revision 1 */
entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
@@ -5954,11 +5924,11 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
/* Support next_rip if host supports it */
if (boot_cpu_has(X86_FEATURE_NRIPS))
- entry->edx |= SVM_FEATURE_NRIP;
+ entry->edx |= F(NRIPS);
/* Support NPT for the guest if enabled */
if (npt_enabled)
- entry->edx |= SVM_FEATURE_NPT;
+ entry->edx |= F(NPT);
break;
case 0x8000001F:
@@ -6067,6 +6037,7 @@ static const struct __x86_intercept {
[x86_intercept_ins] = POST_EX(SVM_EXIT_IOIO),
[x86_intercept_out] = POST_EX(SVM_EXIT_IOIO),
[x86_intercept_outs] = POST_EX(SVM_EXIT_IOIO),
+ [x86_intercept_xsetbv] = PRE_EX(SVM_EXIT_XSETBV),
};
#undef PRE_EX
@@ -7193,6 +7164,21 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
return false;
}
+static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ /*
+ * TODO: Last condition latch INIT signals on vCPU when
+ * vCPU is in guest-mode and vmcb12 defines intercept on INIT.
+ * To properly emulate the INIT intercept, SVM should implement
+ * kvm_x86_ops->check_nested_events() and call nested_svm_vmexit()
+ * there if an INIT signal is pending.
+ */
+ return !gif_set(svm) ||
+ (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT));
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -7329,6 +7315,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.nested_get_evmcs_version = NULL,
.need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
+
+ .apic_init_signal_blocked = svm_apic_init_signal_blocked,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index b5c831e79094..7c741a0c5f80 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -232,17 +232,20 @@ TRACE_EVENT(kvm_exit,
__field( u32, isa )
__field( u64, info1 )
__field( u64, info2 )
+ __field( unsigned int, vcpu_id )
),
TP_fast_assign(
__entry->exit_reason = exit_reason;
__entry->guest_rip = kvm_rip_read(vcpu);
__entry->isa = isa;
+ __entry->vcpu_id = vcpu->vcpu_id;
kvm_x86_ops->get_exit_info(vcpu, &__entry->info1,
&__entry->info2);
),
- TP_printk("reason %s rip 0x%lx info %llx %llx",
+ TP_printk("vcpu %u reason %s rip 0x%lx info %llx %llx",
+ __entry->vcpu_id,
(__entry->isa == KVM_ISA_VMX) ?
__print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) :
__print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS),
@@ -887,36 +890,27 @@ TRACE_EVENT(kvm_pml_full,
TP_printk("vcpu %d: PML full", __entry->vcpu_id)
);
-TRACE_EVENT(kvm_ple_window,
- TP_PROTO(bool grow, unsigned int vcpu_id, int new, int old),
- TP_ARGS(grow, vcpu_id, new, old),
+TRACE_EVENT(kvm_ple_window_update,
+ TP_PROTO(unsigned int vcpu_id, unsigned int new, unsigned int old),
+ TP_ARGS(vcpu_id, new, old),
TP_STRUCT__entry(
- __field( bool, grow )
__field( unsigned int, vcpu_id )
- __field( int, new )
- __field( int, old )
+ __field( unsigned int, new )
+ __field( unsigned int, old )
),
TP_fast_assign(
- __entry->grow = grow;
__entry->vcpu_id = vcpu_id;
__entry->new = new;
__entry->old = old;
),
- TP_printk("vcpu %u: ple_window %d (%s %d)",
- __entry->vcpu_id,
- __entry->new,
- __entry->grow ? "grow" : "shrink",
- __entry->old)
+ TP_printk("vcpu %u old %u new %u (%s)",
+ __entry->vcpu_id, __entry->old, __entry->new,
+ __entry->old < __entry->new ? "growed" : "shrinked")
);
-#define trace_kvm_ple_window_grow(vcpu_id, new, old) \
- trace_kvm_ple_window(true, vcpu_id, new, old)
-#define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
- trace_kvm_ple_window(false, vcpu_id, new, old)
-
TRACE_EVENT(kvm_pvclock_update,
TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
TP_ARGS(vcpu_id, pvclock),
@@ -1320,7 +1314,7 @@ TRACE_EVENT(kvm_avic_incomplete_ipi,
__entry->index = index;
),
- TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u\n",
+ TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u",
__entry->vcpu, __entry->icrh, __entry->icrl,
__entry->id, __entry->index)
);
@@ -1345,7 +1339,7 @@ TRACE_EVENT(kvm_avic_unaccelerated_access,
__entry->vec = vec;
),
- TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x\n",
+ TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x",
__entry->vcpu,
__entry->offset,
__print_symbolic(__entry->offset, kvm_trace_symbol_apic),
@@ -1462,6 +1456,46 @@ TRACE_EVENT(kvm_hv_send_ipi_ex,
__entry->vector, __entry->format,
__entry->valid_bank_mask)
);
+
+TRACE_EVENT(kvm_pv_tlb_flush,
+ TP_PROTO(unsigned int vcpu_id, bool need_flush_tlb),
+ TP_ARGS(vcpu_id, need_flush_tlb),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, vcpu_id )
+ __field( bool, need_flush_tlb )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->need_flush_tlb = need_flush_tlb;
+ ),
+
+ TP_printk("vcpu %u need_flush_tlb %s", __entry->vcpu_id,
+ __entry->need_flush_tlb ? "true" : "false")
+);
+
+/*
+ * Tracepoint for failed nested VMX VM-Enter.
+ */
+TRACE_EVENT(kvm_nested_vmenter_failed,
+ TP_PROTO(const char *msg, u32 err),
+ TP_ARGS(msg, err),
+
+ TP_STRUCT__entry(
+ __field(const char *, msg)
+ __field(u32, err)
+ ),
+