summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorMichael S. Tsirkin <mst@redhat.com>2017-04-21 12:27:17 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2017-04-21 12:50:28 +0200
commit668fffa3f838edfcb1679f842f7ef1afa61c3e9a (patch)
treeedf2ba4b34dfc5f3757d20e94c598829bbb48e77 /arch/x86/kvm
parentdb2336a80489e7c3c7728cefd9be58fac5ecfb39 (diff)
kvm: better MWAIT emulation for guests
Guests that are heavy on futexes end up IPI'ing each other a lot. That can lead to significant slowdowns and latency increase for those guests when running within KVM. If only a single guest is needed on a host, we have a lot of spare host CPU time we can throw at the problem. Modern CPUs implement a feature called "MWAIT" which allows guests to wake up sleeping remote CPUs without an IPI - thus without an exit - at the expense of never going out of guest context. The decision whether this is something sensible to use should be up to the VM admin, so to user space. We can however allow MWAIT execution on systems that support it properly hardware wise. This patch adds a CAP to user space and a KVM cpuid leaf to indicate availability of native MWAIT execution. With that enabled, the worst a guest can do is waste as many cycles as a "jmp ." would do, so it's not a privilege problem. We consciously do *not* expose the feature in our CPUID bitmap, as most people will want to benefit from sleeping vCPUs to allow for over commit. Reported-by: "Gabriel L. Somlo" <gsomlo@gmail.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> [agraf: fix amd, change commit message] Signed-off-by: Alexander Graf <agraf@suse.de> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/svm.c7
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--arch/x86/kvm/x86.h36
4 files changed, 48 insertions, 4 deletions
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 1b203abf76e1..c41f03e5090a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1198,10 +1198,13 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_CLGI);
set_intercept(svm, INTERCEPT_SKINIT);
set_intercept(svm, INTERCEPT_WBINVD);
- set_intercept(svm, INTERCEPT_MONITOR);
- set_intercept(svm, INTERCEPT_MWAIT);
set_intercept(svm, INTERCEPT_XSETBV);
+ if (!kvm_mwait_in_guest()) {
+ set_intercept(svm, INTERCEPT_MONITOR);
+ set_intercept(svm, INTERCEPT_MWAIT);
+ }
+
control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = __pa(svm->msrpm);
control->int_ctl = V_INTR_MASKING_MASK;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c1a12b94e1fd..a4ef63718101 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3527,11 +3527,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_USE_IO_BITMAPS |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
- CPU_BASED_MWAIT_EXITING |
- CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING |
CPU_BASED_RDPMC_EXITING;
+ if (!kvm_mwait_in_guest())
+ min |= CPU_BASED_MWAIT_EXITING |
+ CPU_BASED_MONITOR_EXITING;
+
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 49a69c0a0d50..2f9fe6bf7091 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2687,6 +2687,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
break;
+ case KVM_CAP_X86_GUEST_MWAIT:
+ r = kvm_mwait_in_guest();
+ break;
case KVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
* and SMM handlers might indeed rely on 4G segment limits,
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e8ff3e4ce38a..612067074905 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -1,6 +1,8 @@
#ifndef ARCH_X86_KVM_X86_H
#define ARCH_X86_KVM_X86_H
+#include <asm/processor.h>
+#include <asm/mwait.h>
#include <linux/kvm_host.h>
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
@@ -212,4 +214,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
__rem; \
})
+static inline bool kvm_mwait_in_guest(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
+ return false;
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ /* All AMD CPUs have a working MWAIT implementation */
+ return true;
+ case X86_VENDOR_INTEL:
+ /* Handle Intel below */
+ break;
+ default:
+ return false;
+ }
+
+ /*
+ * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
+ * they would allow guest to stop the CPU completely by disabling
+ * interrupts then invoking MWAIT.
+ */
+ if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+ return false;
+
+ cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+ return false;
+
+ return true;
+}
+
#endif