summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/hyperv.c280
-rw-r--r--arch/x86/kvm/hyperv.h4
-rw-r--r--arch/x86/kvm/lapic.c45
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c393
-rw-r--r--arch/x86/kvm/mmu.h13
-rw-r--r--arch/x86/kvm/mmu_audit.c12
-rw-r--r--arch/x86/kvm/paging_tmpl.h15
-rw-r--r--arch/x86/kvm/svm.c64
-rw-r--r--arch/x86/kvm/trace.h42
-rw-r--r--arch/x86/kvm/vmx.c2287
-rw-r--r--arch/x86/kvm/vmx_shadow_fields.h5
-rw-r--r--arch/x86/kvm/x86.c244
-rw-r--r--arch/x86/kvm/x86.h2
14 files changed, 2358 insertions, 1050 deletions
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 01d209ab5481..4e80080f277a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -36,6 +36,8 @@
#include "trace.h"
+#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
+
static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
{
return atomic64_read(&synic->sint[sint]);
@@ -132,8 +134,10 @@ static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
struct kvm_vcpu *vcpu = NULL;
int i;
- if (vpidx < KVM_MAX_VCPUS)
- vcpu = kvm_get_vcpu(kvm, vpidx);
+ if (vpidx >= KVM_MAX_VCPUS)
+ return NULL;
+
+ vcpu = kvm_get_vcpu(kvm, vpidx);
if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
return vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
@@ -689,6 +693,24 @@ void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
stimer_cleanup(&hv_vcpu->stimer[i]);
}
+bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
+ return false;
+ return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
+}
+EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
+
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
+ struct hv_vp_assist_page *assist_page)
+{
+ if (!kvm_hv_assist_page_enabled(vcpu))
+ return false;
+ return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
+ assist_page, sizeof(*assist_page));
+}
+EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
+
static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
{
struct hv_message *msg = &stimer->msg;
@@ -1040,21 +1062,41 @@ static u64 current_task_runtime_100ns(void)
static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
{
- struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+ struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
switch (msr) {
- case HV_X64_MSR_VP_INDEX:
- if (!host)
+ case HV_X64_MSR_VP_INDEX: {
+ struct kvm_hv *hv = &vcpu->kvm->arch.hyperv;
+ int vcpu_idx = kvm_vcpu_get_idx(vcpu);
+ u32 new_vp_index = (u32)data;
+
+ if (!host || new_vp_index >= KVM_MAX_VCPUS)
return 1;
- hv->vp_index = (u32)data;
+
+ if (new_vp_index == hv_vcpu->vp_index)
+ return 0;
+
+ /*
+ * The VP index is initialized to vcpu_index by
+ * kvm_hv_vcpu_postcreate so they initially match. Now the
+ * VP index is changing, adjust num_mismatched_vp_indexes if
+ * it now matches or no longer matches vcpu_idx.
+ */
+ if (hv_vcpu->vp_index == vcpu_idx)
+ atomic_inc(&hv->num_mismatched_vp_indexes);
+ else if (new_vp_index == vcpu_idx)
+ atomic_dec(&hv->num_mismatched_vp_indexes);
+
+ hv_vcpu->vp_index = new_vp_index;
break;
+ }
case HV_X64_MSR_VP_ASSIST_PAGE: {
u64 gfn;
unsigned long addr;
if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
- hv->hv_vapic = data;
- if (kvm_lapic_enable_pv_eoi(vcpu, 0))
+ hv_vcpu->hv_vapic = data;
+ if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
return 1;
break;
}
@@ -1062,12 +1104,19 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
if (kvm_is_error_hva(addr))
return 1;
- if (__clear_user((void __user *)addr, PAGE_SIZE))
+
+ /*
+ * Clear apic_assist portion of f(struct hv_vp_assist_page
+ * only, there can be valuable data in the rest which needs
+ * to be preserved e.g. on migration.
+ */
+ if (__clear_user((void __user *)addr, sizeof(u32)))
return 1;
- hv->hv_vapic = data;
+ hv_vcpu->hv_vapic = data;
kvm_vcpu_mark_page_dirty(vcpu, gfn);
if (kvm_lapic_enable_pv_eoi(vcpu,
- gfn_to_gpa(gfn) | KVM_MSR_ENABLED))
+ gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
+ sizeof(struct hv_vp_assist_page)))
return 1;
break;
}
@@ -1080,7 +1129,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
case HV_X64_MSR_VP_RUNTIME:
if (!host)
return 1;
- hv->runtime_offset = data - current_task_runtime_100ns();
+ hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
break;
case HV_X64_MSR_SCONTROL:
case HV_X64_MSR_SVERSION:
@@ -1172,11 +1221,11 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
bool host)
{
u64 data = 0;
- struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+ struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
switch (msr) {
case HV_X64_MSR_VP_INDEX:
- data = hv->vp_index;
+ data = hv_vcpu->vp_index;
break;
case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
@@ -1185,10 +1234,10 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
case HV_X64_MSR_TPR:
return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
case HV_X64_MSR_VP_ASSIST_PAGE:
- data = hv->hv_vapic;
+ data = hv_vcpu->hv_vapic;
break;
case HV_X64_MSR_VP_RUNTIME:
- data = current_task_runtime_100ns() + hv->runtime_offset;
+ data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
break;
case HV_X64_MSR_SCONTROL:
case HV_X64_MSR_SVERSION:
@@ -1255,32 +1304,47 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
return kvm_hv_get_msr(vcpu, msr, pdata, host);
}
-static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
+static __always_inline unsigned long *sparse_set_to_vcpu_mask(
+ struct kvm *kvm, u64 *sparse_banks, u64 valid_bank_mask,
+ u64 *vp_bitmap, unsigned long *vcpu_bitmap)
{
- int i = 0, j;
+ struct kvm_hv *hv = &kvm->arch.hyperv;
+ struct kvm_vcpu *vcpu;
+ int i, bank, sbank = 0;
- if (!(valid_bank_mask & BIT_ULL(bank_no)))
- return -1;
+ memset(vp_bitmap, 0,
+ KVM_HV_MAX_SPARSE_VCPU_SET_BITS * sizeof(*vp_bitmap));
+ for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
+ KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
+ vp_bitmap[bank] = sparse_banks[sbank++];
- for (j = 0; j < bank_no; j++)
- if (valid_bank_mask & BIT_ULL(j))
- i++;
+ if (likely(!atomic_read(&hv->num_mismatched_vp_indexes))) {
+ /* for all vcpus vp_index == vcpu_idx */
+ return (unsigned long *)vp_bitmap;
+ }
- return i;
+ bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (test_bit(vcpu_to_hv_vcpu(vcpu)->vp_index,
+ (unsigned long *)vp_bitmap))
+ __set_bit(i, vcpu_bitmap);
+ }
+ return vcpu_bitmap;
}
static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
u16 rep_cnt, bool ex)
{
struct kvm *kvm = current_vcpu->kvm;
- struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
+ struct kvm_vcpu_hv *hv_vcpu = &current_vcpu->arch.hyperv;
struct hv_tlb_flush_ex flush_ex;
struct hv_tlb_flush flush;
- struct kvm_vcpu *vcpu;
- unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
- unsigned long valid_bank_mask = 0;
+ u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
+ DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+ unsigned long *vcpu_mask;
+ u64 valid_bank_mask;
u64 sparse_banks[64];
- int sparse_banks_len, i;
+ int sparse_banks_len;
bool all_cpus;
if (!ex) {
@@ -1290,6 +1354,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
trace_kvm_hv_flush_tlb(flush.processor_mask,
flush.address_space, flush.flags);
+ valid_bank_mask = BIT_ULL(0);
sparse_banks[0] = flush.processor_mask;
all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
} else {
@@ -1306,7 +1371,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
all_cpus = flush_ex.hv_vp_set.format !=
HV_GENERIC_SET_SPARSE_4K;
- sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+ sparse_banks_len =
+ bitmap_weight((unsigned long *)&valid_bank_mask, 64) *
sizeof(sparse_banks[0]);
if (!sparse_banks_len && !all_cpus)
@@ -1321,48 +1387,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
return HV_STATUS_INVALID_HYPERCALL_INPUT;
}
- cpumask_clear(&hv_current->tlb_lush);
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
- int bank = hv->vp_index / 64, sbank = 0;
-
- if (!all_cpus) {
- /* Banks >64 can't be represented */
- if (bank >= 64)
- continue;
-
- /* Non-ex hypercalls can only address first 64 vCPUs */
- if (!ex && bank)
- continue;
-
- if (ex) {
- /*
- * Check is the bank of this vCPU is in sparse
- * set and get the sparse bank number.
- */
- sbank = get_sparse_bank_no(valid_bank_mask,
- bank);
-
- if (sbank < 0)
- continue;
- }
-
- if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
- continue;
- }
+ cpumask_clear(&hv_vcpu->tlb_flush);
- /*
- * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
- * can't analyze it here, flush TLB regardless of the specified
- * address space.
- */
- __set_bit(i, vcpu_bitmap);
- }
+ vcpu_mask = all_cpus ? NULL :
+ sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
+ vp_bitmap, vcpu_bitmap);
+ /*
+ * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
+ * analyze it here, flush TLB regardless of the specified address space.
+ */
kvm_make_vcpus_request_mask(kvm,
KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
- vcpu_bitmap, &hv_current->tlb_lush);
+ vcpu_mask, &hv_vcpu->tlb_flush);
ret_success:
/* We always do full TLB flush, set rep_done = rep_cnt. */
@@ -1370,6 +1407,99 @@ ret_success:
((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
}
+static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
+ unsigned long *vcpu_bitmap)
+{
+ struct kvm_lapic_irq irq = {
+ .delivery_mode = APIC_DM_FIXED,
+ .vector = vector
+ };
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
+ continue;
+
+ /* We fail only when APIC is disabled */
+ kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
+}
+
+static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa,
+ bool ex, bool fast)
+{
+ struct kvm *kvm = current_vcpu->kvm;
+ struct hv_send_ipi_ex send_ipi_ex;
+ struct hv_send_ipi send_ipi;
+ u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
+ DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS);
+ unsigned long *vcpu_mask;
+ unsigned long valid_bank_mask;
+ u64 sparse_banks[64];
+ int sparse_banks_len;
+ u32 vector;
+ bool all_cpus;
+
+ if (!ex) {
+ if (!fast) {
+ if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi,
+ sizeof(send_ipi))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ sparse_banks[0] = send_ipi.cpu_mask;
+ vector = send_ipi.vector;
+ } else {
+ /* 'reserved' part of hv_send_ipi should be 0 */
+ if (unlikely(ingpa >> 32 != 0))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ sparse_banks[0] = outgpa;
+ vector = (u32)ingpa;
+ }
+ all_cpus = false;
+ valid_bank_mask = BIT_ULL(0);
+
+ trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
+ } else {
+ if (unlikely(kvm_read_guest(kvm, ingpa, &send_ipi_ex,
+ sizeof(send_ipi_ex))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
+ send_ipi_ex.vp_set.format,
+ send_ipi_ex.vp_set.valid_bank_mask);
+
+ vector = send_ipi_ex.vector;
+ valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
+ sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+ sizeof(sparse_banks[0]);
+
+ all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
+
+ if (!sparse_banks_len)
+ goto ret_success;
+
+ if (!all_cpus &&
+ kvm_read_guest(kvm,
+ ingpa + offsetof(struct hv_send_ipi_ex,
+ vp_set.bank_contents),
+ sparse_banks,
+ sparse_banks_len))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ }
+
+ if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ vcpu_mask = all_cpus ? NULL :
+ sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask,
+ vp_bitmap, vcpu_bitmap);
+
+ kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
+
+ret_success:
+ return HV_STATUS_SUCCESS;
+}
+
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
{
return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
@@ -1539,6 +1669,20 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
}
ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
break;
+ case HVCALL_SEND_IPI:
+ if (unlikely(rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, false, fast);
+ break;
+ case HVCALL_SEND_IPI_EX:
+ if (unlikely(fast || rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false);
+ break;
default:
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index d6aa969e20f1..0e66c12ed2c3 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -62,6 +62,10 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
+bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
+ struct hv_vp_assist_page *assist_page);
+
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
int timer_index)
{
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fbb0e6df121b..3cd227ff807f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,6 +70,11 @@
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
+static bool lapic_timer_advance_adjust_done = false;
+#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
+/* step-by-step approximation to mitigate fluctuation */
+#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
+
static inline int apic_test_vector(int vec, void *bitmap)
{
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -955,14 +960,14 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
map = rcu_dereference(kvm->arch.apic_map);
ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
- if (ret)
+ if (ret) {
+ *r = 0;
for_each_set_bit(i, &bitmap, 16) {
if (!dst[i])
continue;
- if (*r < 0)
- *r = 0;
*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
}
+ }
rcu_read_unlock();
return ret;
@@ -1472,7 +1477,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
void wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- u64 guest_tsc, tsc_deadline;
+ u64 guest_tsc, tsc_deadline, ns;
if (!lapic_in_kernel(vcpu))
return;
@@ -1492,6 +1497,24 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
if (guest_tsc < tsc_deadline)
__delay(min(tsc_deadline - guest_tsc,
nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
+
+ if (!lapic_timer_advance_adjust_done) {
+ /* too early */
+ if (guest_tsc < tsc_deadline) {
+ ns = (tsc_deadline - guest_tsc) * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ lapic_timer_advance_ns -= min((unsigned int)ns,
+ lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ } else {
+ /* too late */
+ ns = (guest_tsc - tsc_deadline) * 1000000ULL;
+ do_div(ns, vcpu->arch.virtual_tsc_khz);
+ lapic_timer_advance_ns += min((unsigned int)ns,
+ lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ }
+ if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
+ lapic_timer_advance_adjust_done = true;
+ }
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
@@ -2621,17 +2644,25 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
+ struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
+ unsigned long new_len;
+
if (!IS_ALIGNED(addr, 4))
return 1;
vcpu->arch.pv_eoi.msr_val = data;
if (!pv_eoi_enabled(vcpu))
return 0;
- return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
- addr, sizeof(u8));
+
+ if (addr == ghc->gpa && len <= ghc->len)
+ new_len = ghc->len;
+ else
+ new_len = len;
+
+ return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
}
void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ed0ed39abd36..ff6ef9c3d760 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -120,7 +120,7 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
return vcpu->arch.hyperv.hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
}
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
+int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_init(void);
void kvm_lapic_exit(void);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e843ec46609d..cf5f572f2305 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -932,7 +932,7 @@ static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
obj = kmem_cache_zalloc(base_cache, GFP_KERNEL);
if (!obj)
- return -ENOMEM;
+ return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = obj;
}
return 0;
@@ -960,7 +960,7 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
page = (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
if (!page)
- return -ENOMEM;
+ return cache->nobjs >= min ? 0 : -ENOMEM;
cache->objects[cache->nobjs++] = page;
}
return 0;
@@ -1265,24 +1265,24 @@ pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
mmu_free_pte_list_desc(desc);
}
-static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+static void __pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
struct pte_list_desc *prev_desc;
int i;
if (!rmap_head->val) {
- printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
+ pr_err("%s: %p 0->BUG\n", __func__, spte);
BUG();
} else if (!(rmap_head->val & 1)) {
- rmap_printk("pte_list_remove: %p 1->0\n", spte);
+ rmap_printk("%s: %p 1->0\n", __func__, spte);
if ((u64 *)rmap_head->val != spte) {
- printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte);
+ pr_err("%s: %p 1->BUG\n", __func__, spte);
BUG();
}
rmap_head->val = 0;
} else {
- rmap_printk("pte_list_remove: %p many->many\n", spte);
+ rmap_printk("%s: %p many->many\n", __func__, spte);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
prev_desc = NULL;
while (desc) {
@@ -1296,11 +1296,17 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
prev_desc = desc;
desc = desc->more;
}
- pr_err("pte_list_remove: %p many->many\n", spte);
+ pr_err("%s: %p many->many\n", __func__, spte);
BUG();
}
}
+static void pte_list_remove(struct kvm_rmap_head *rmap_head, u64 *sptep)
+{
+ mmu_spte_clear_track_bits(sptep);
+ __pte_list_remove(sptep, rmap_head);
+}
+
static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
struct kvm_memory_slot *slot)
{
@@ -1349,7 +1355,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
sp = page_header(__pa(spte));
gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
rmap_head = gfn_to_rmap(kvm, gfn, sp);
- pte_list_remove(spte, rmap_head);
+ __pte_list_remove(spte, rmap_head);
}
/*
@@ -1685,7 +1691,7 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
while ((sptep = rmap_get_first(rmap_head, &iter))) {
rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
- drop_spte(kvm, sptep);
+ pte_list_remove(rmap_head, sptep);
flush = true;
}
@@ -1721,7 +1727,7 @@ restart:
need_flush = 1;
if (pte_write(*ptep)) {
- drop_spte(kvm, sptep);
+ pte_list_remove(rmap_head, sptep);
goto restart;
} else {
new_spte = *sptep & ~PT64_BASE_ADDR_MASK;
@@ -1988,7 +1994,7 @@ static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
u64 *parent_pte)
{
- pte_list_remove(parent_pte, &sp->parent_ptes);
+ __pte_list_remove(parent_pte, &sp->parent_ptes);
}
static void drop_parent_pte(struct kvm_mmu_page *sp,
@@ -2181,7 +2187,7 @@ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
struct list_head *invalid_list)
{
if (sp->role.cr4_pae != !!is_pae(vcpu)
- || vcpu->arch.mmu.sync_page(vcpu, sp) == 0) {
+ || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) {
kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list);
return false;
}
@@ -2375,14 +2381,14 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
int collisions = 0;
LIST_HEAD(invalid_list);
- role = vcpu->arch.mmu.base_role;
+ role = vcpu->arch.mmu->mmu_role.base;
role.level = level;
role.direct = direct;
if (role.direct)
role.cr4_pae = 0;
role.access = access;
- if (!vcpu->arch.mmu.direct_map
- && vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
+ if (!vcpu->arch.mmu->direct_map
+ && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
role.quadrant = quadrant;
@@ -2457,11 +2463,11 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
{
iterator->addr = addr;
iterator->shadow_addr = root;
- iterator->level = vcpu->arch.mmu.shadow_root_level;
+ iterator->level = vcpu->arch.mmu->shadow_root_level;
if (iterator->level == PT64_ROOT_4LEVEL &&
- vcpu->arch.mmu.root_level < PT64_ROOT_4LEVEL &&
- !vcpu->arch.mmu.direct_map)
+ vcpu->arch.mmu->root_level < PT64_ROOT_4LEVEL &&
+ !vcpu->arch.mmu->direct_map)
--iterator->level;
if (iterator->level == PT32E_ROOT_LEVEL) {
@@ -2469,10 +2475,10 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
* prev_root is currently only used for 64-bit hosts. So only
* the active root_hpa is valid here.
*/
- BUG_ON(root != vcpu->arch.mmu.root_hpa);
+ BUG_ON(root != vcpu->arch.mmu->root_hpa);
iterator->shadow_addr
- = vcpu->arch.mmu.pae_root[(addr >> 30) & 3];
+ = vcpu->arch.mmu->pae_root[(addr >> 30) & 3];
iterator->shadow_addr &= PT64_BASE_ADDR_MASK;
--iterator->level;
if (!iterator->shadow_addr)
@@ -2483,7 +2489,7 @@ static void shadow_walk_init_using_root(struct kvm_shadow_walk_iterator *iterato
static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator,
struct kvm_vcpu *vcpu, u64 addr)
{
- shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu.root_hpa,
+ shadow_walk_init_using_root(iterator, vcpu, vcpu->arch.mmu->root_hpa,
addr);
}
@@ -3095,7 +3101,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
int emulate = 0;
gfn_t pseudo_gfn;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return 0;
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
@@ -3301,7 +3307,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
u64 spte = 0ull;
uint retry_count = 0;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return false;
if (!page_fault_can_be_fast(error_code))
@@ -3471,11 +3477,11 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
}
/* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
-void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, ulong roots_to_free)
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ ulong roots_to_free)
{
int i;
LIST_HEAD(invalid_list);
- struct kvm_mmu *mmu = &vcpu->arch.mmu;
bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
@@ -3535,20 +3541,20 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
struct kvm_mmu_page *sp;
unsigned i;
- if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL) {
+ if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
spin_lock(&vcpu->kvm->mmu_lock);
if(make_mmu_pages_available(vcpu) < 0) {
spin_unlock(&vcpu->kvm->mmu_lock);
return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, 0, 0,
- vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
+ vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.root_hpa = __pa(sp->spt);
- } else if (vcpu->arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) {
+ vcpu->arch.mmu->root_hpa = __pa(sp->spt);
+ } else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) {
for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
+ hpa_t root = vcpu->arch.mmu->pae_root[i];
MMU_WARN_ON(VALID_PAGE(root));
spin_lock(&vcpu->kvm->mmu_lock);
@@ -3561,9 +3567,9 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK;
+ vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK;
}
- vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+ vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
} else
BUG();
@@ -3577,7 +3583,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
gfn_t root_gfn;
int i;
- root_gfn = vcpu->arch.mmu.get_cr3(vcpu) >> PAGE_SHIFT;
+ root_gfn = vcpu->arch.mmu->get_cr3(vcpu) >> PAGE_SHIFT;
if (mmu_check_root(vcpu, root_gfn))
return 1;
@@ -3586,8 +3592,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* Do we shadow a long mode page table? If so we need to
* write-protect the guests page table root.
*/
- if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
- hpa_t root = vcpu->arch.mmu.root_hpa;
+ if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+ hpa_t root = vcpu->arch.mmu->root_hpa;
MMU_WARN_ON(VALID_PAGE(root));
@@ -3597,11 +3603,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
return -ENOSPC;
}
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
- vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
+ vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL);
root = __pa(sp->spt);
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.root_hpa = root;
+ vcpu->arch.mmu->root_hpa = root;
return 0;
}
@@ -3611,17 +3617,17 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
* the shadow page table may be a PAE or a long mode page table.
*/
pm_mask = PT_PRESENT_MASK;
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL)
+ if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL)
pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
+ hpa_t root = vcpu->arch.mmu->pae_root[i];
MMU_WARN_ON(VALID_PAGE(root));
- if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) {
- pdptr = vcpu->arch.mmu.get_pdptr(vcpu, i);
+ if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) {
+ pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i);
if (!(pdptr & PT_PRESENT_MASK)) {
- vcpu->arch.mmu.pae_root[i] = 0;
+ vcpu->arch.mmu->pae_root[i] = 0;
continue;
}
root_gfn = pdptr >> PAGE_SHIFT;
@@ -3639,16 +3645,16 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
++sp->root_count;
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.pae_root[i] = root | pm_mask;
+ vcpu->arch.mmu->pae_root[i] = root | pm_mask;
}
- vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root);
+ vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root);
/*
* If we shadow a 32 bit page table with a long mode page
* table we enter this path.
*/
- if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_4LEVEL) {
- if (vcpu->arch.mmu.lm_root == NULL) {
+ if (vcpu->arch.mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
+ if (vcpu->arch.mmu->lm_root == NULL) {
/*
* The additional page necessary for this is only
* allocated on demand.
@@ -3660,12 +3666,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
if (lm_root == NULL)
return 1;
- lm_root[0] = __pa(vcpu->arch.mmu.pae_root) | pm_mask;
+ lm_root[0] = __pa(vcpu->arch.mmu->pae_root) | pm_mask;
- vcpu->arch.mmu.lm_root = lm_root;
+ vcpu->arch.mmu->lm_root = lm_root;
}
- vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.lm_root);
+ vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root);
}
return 0;
@@ -3673,7 +3679,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
static int mmu_alloc_roots(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.mmu.direct_map)
+ if (vcpu->arch.mmu->direct_map)
return mmu_alloc_direct_roots(vcpu);
else
return mmu_alloc_shadow_roots(vcpu);
@@ -3684,17 +3690,16 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
int i;
struct kvm_mmu_page *sp;
- if (vcpu->arch.mmu.direct_map)
+ if (vcpu->arch.mmu->direct_map)
return;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return;
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
- if (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL) {
- hpa_t root = vcpu->arch.mmu.root_hpa;
-
+ if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) {
+ hpa_t root = vcpu->arch.mmu->root_hpa;
sp = page_header(root);
/*
@@ -3725,7 +3730,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
+ hpa_t root = vcpu->arch.mmu->pae_root[i];
if (root && VALID_PAGE(root)) {
root &= PT64_BASE_ADDR_MASK;
@@ -3799,7 +3804,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
int root, leaf;
bool reserved = false;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
goto exit;
walk_shadow_page_lockless_begin(vcpu);
@@ -3816,7 +3821,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
if (!is_shadow_present_pte(spte))
break;
- reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte,
+ reserved |= is_shadow_zero_bits_set(vcpu->arch.mmu, spte,
iterator.level);
}
@@ -3895,7 +3900,7 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr)
struct kvm_shadow_walk_iterator iterator;
u64 spte;
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return;
walk_shadow_page_lockless_begin(vcpu);
@@ -3922,7 +3927,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
if (r)
return r;
- MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa));
+ MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root_hpa));