summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/hyperv/mmu.c28
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h25
-rw-r--r--arch/x86/include/asm/kvm_emulate.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/emulate.c76
-rw-r--r--arch/x86/kvm/hyperv.c171
-rw-r--r--arch/x86/kvm/lapic.c12
-rw-r--r--arch/x86/kvm/lapic.h14
-rw-r--r--arch/x86/kvm/mmu.c78
-rw-r--r--arch/x86/kvm/svm.c16
-rw-r--r--arch/x86/kvm/trace.h51
-rw-r--r--arch/x86/kvm/vmx.c489
-rw-r--r--arch/x86/kvm/x86.c97
-rw-r--r--arch/x86/kvm/x86.h4
17 files changed, 813 insertions, 268 deletions
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 5f053d7d1bd9..de27615c51ea 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -13,22 +13,6 @@
#define CREATE_TRACE_POINTS
#include <asm/trace/hyperv.h>
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_flush_pcpu {
- u64 address_space;
- u64 flags;
- u64 processor_mask;
- u64 gva_list[];
-};
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_flush_pcpu_ex {
- u64 address_space;
- u64 flags;
- struct hv_vpset hv_vp_set;
- u64 gva_list[];
-};
-
/* Each gva in gva_list encodes up to 4096 pages to flush */
#define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
@@ -67,8 +51,8 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
int cpu, vcpu, gva_n, max_gvas;
- struct hv_flush_pcpu **flush_pcpu;
- struct hv_flush_pcpu *flush;
+ struct hv_tlb_flush **flush_pcpu;
+ struct hv_tlb_flush *flush;
u64 status = U64_MAX;
unsigned long flags;
@@ -82,7 +66,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus,
local_irq_save(flags);
- flush_pcpu = (struct hv_flush_pcpu **)
+ flush_pcpu = (struct hv_tlb_flush **)
this_cpu_ptr(hyperv_pcpu_input_arg);
flush = *flush_pcpu;
@@ -152,8 +136,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
int nr_bank = 0, max_gvas, gva_n;
- struct hv_flush_pcpu_ex **flush_pcpu;
- struct hv_flush_pcpu_ex *flush;
+ struct hv_tlb_flush_ex **flush_pcpu;
+ struct hv_tlb_flush_ex *flush;
u64 status = U64_MAX;
unsigned long flags;
@@ -167,7 +151,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
local_irq_save(flags);
- flush_pcpu = (struct hv_flush_pcpu_ex **)
+ flush_pcpu = (struct hv_tlb_flush_ex **)
this_cpu_ptr(hyperv_pcpu_input_arg);
flush = *flush_pcpu;
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 3bfa92c2793c..b8c89265baf0 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -308,6 +308,9 @@ struct ms_hyperv_tsc_page {
/* TSC emulation after migration */
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+/* Nested features (CPUID 0x4000000A) EAX */
+#define HV_X64_NESTED_MSR_BITMAP BIT(19)
+
struct hv_reenlightenment_control {
__u64 vector:8;
__u64 reserved1:8;
@@ -678,7 +681,11 @@ struct hv_enlightened_vmcs {
u32 hv_clean_fields;
u32 hv_padding_32;
u32 hv_synthetic_controls;
- u32 hv_enlightenments_control;
+ struct {
+ u32 nested_flush_hypercall:1;
+ u32 msr_bitmap:1;
+ u32 reserved:30;
+ } hv_enlightenments_control;
u32 hv_vp_id;
u64 hv_vm_id;
@@ -734,4 +741,20 @@ struct ipi_arg_ex {
struct hv_vpset vp_set;
};
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+ u64 address_space;
+ u64 flags;
+ u64 processor_mask;
+ u64 gva_list[];
+};
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+ u64 address_space;
+ u64 flags;
+ struct hv_vpset hv_vp_set;
+ u64 gva_list[];
+};
+
#endif
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index b24b1c8b3979..0f82cd91cd3c 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -107,11 +107,12 @@ struct x86_emulate_ops {
* @addr: [IN ] Linear address from which to read.
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
+ * @system:[IN ] Whether the access is forced to be at CPL0.
*/
int (*read_std)(struct x86_emulate_ctxt *ctxt,
unsigned long addr, void *val,
unsigned int bytes,
- struct x86_exception *fault);
+ struct x86_exception *fault, bool system);
/*
* read_phys: Read bytes of standard (non-emulated/special) memory.
@@ -129,10 +130,11 @@ struct x86_emulate_ops {
* @addr: [IN ] Linear address to which to write.
* @val: [OUT] Value write to memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to write to memory.
+ * @system:[IN ] Whether the access is forced to be at CPL0.
*/
int (*write_std)(struct x86_emulate_ctxt *ctxt,
unsigned long addr, void *val, unsigned int bytes,
- struct x86_exception *fault);
+ struct x86_exception *fault, bool system);
/*
* fetch: Read bytes of standard (non-emulated/special) memory.
* Used for instruction fetch.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f4b2588865e9..c13cd28d9d1b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -258,7 +258,8 @@ union kvm_mmu_page_role {
unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
unsigned ad_disabled:1;
- unsigned :7;
+ unsigned guest_mode:1;
+ unsigned :6;
/*
* This is left at the top of the word so that
@@ -476,6 +477,7 @@ struct kvm_vcpu_hv {
struct kvm_hyperv_exit exit;
struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
+ cpumask_t tlb_lush;
};
struct kvm_vcpu_arch {
@@ -995,7 +997,7 @@ struct kvm_x86_ops {
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
- void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+ void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
@@ -1277,6 +1279,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 997192131b7b..3cd14311edfa 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -269,7 +269,7 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset,
return 0;
/*
- * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
+ * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex
* structs are not cleared between calls, we risk flushing unneeded
* vCPUs otherwise.
*/
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 5db8b0b10766..425e6b8b9547 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -207,7 +207,9 @@ enum vmcs_field {
EPTP_LIST_ADDRESS = 0x00002024,
EPTP_LIST_ADDRESS_HIGH = 0x00002025,
VMREAD_BITMAP = 0x00002026,
+ VMREAD_BITMAP_HIGH = 0x00002027,
VMWRITE_BITMAP = 0x00002028,
+ VMWRITE_BITMAP_HIGH = 0x00002029,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
TSC_MULTIPLIER = 0x00002032,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index f4f30d0c25c4..5720e78b2f7b 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -404,7 +404,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_cpuid_7_0_ecx_x86_features =
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
- F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG);
+ F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
+ F(CLDEMOTE);
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b3705ae52824..4c4f4263420c 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -812,6 +812,19 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
return assign_eip_near(ctxt, ctxt->_eip + rel);
}
+static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear,
+ void *data, unsigned size)
+{
+ return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true);
+}
+
+static int linear_write_system(struct x86_emulate_ctxt *ctxt,
+ ulong linear, void *data,
+ unsigned int size)
+{
+ return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true);
+}
+
static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
struct segmented_address addr,
void *data,
@@ -823,7 +836,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
rc = linearize(ctxt, addr, size, false, &linear);
if (rc != X86EMUL_CONTINUE)
return rc;
- return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception);
+ return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false);
}
static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
@@ -837,7 +850,7 @@ static int segmented_write_std(struct x86_emulate_ctxt *ctxt,
rc = linearize(ctxt, addr, size, true, &linear);
if (rc != X86EMUL_CONTINUE)
return rc;
- return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception);
+ return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false);
}
/*
@@ -1496,8 +1509,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
return emulate_gp(ctxt, index << 3 | 0x2);
addr = dt.address + index * 8;
- return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc,
- &ctxt->exception);
+ return linear_read_system(ctxt, addr, desc, sizeof *desc);
}
static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
@@ -1560,8 +1572,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (rc != X86EMUL_CONTINUE)
return rc;
- return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc),
- &ctxt->exception);
+ return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc));
}
/* allowed just for 8 bytes segments */
@@ -1575,8 +1586,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
if (rc != X86EMUL_CONTINUE)
return rc;
- return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc,
- &ctxt->exception);
+ return linear_write_system(ctxt, addr, desc, sizeof *desc);
}
static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
@@ -1737,8 +1747,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
return ret;
}
} else if (ctxt->mode == X86EMUL_MODE_PROT64) {
- ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3,
- sizeof(base3), &ctxt->exception);
+ ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3));
if (ret != X86EMUL_CONTINUE)
return ret;
if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
@@ -2051,11 +2060,11 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
eip_addr = dt.address + (irq << 2);
cs_addr = dt.address + (irq << 2) + 2;
- rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception);
+ rc = linear_read_system(ctxt, cs_addr, &cs, 2);
if (rc != X86EMUL_CONTINUE)
return rc;
- rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception);
+ rc = linear_read_system(ctxt, eip_addr, &eip, 2);
if (rc != X86EMUL_CONTINUE)
return rc;
@@ -2919,12 +2928,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
#ifdef CONFIG_X86_64
base |= ((u64)base3) << 32;
#endif
- r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL);
+ r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true);
if (r != X86EMUL_CONTINUE)
return false;
if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg))
return false;
- r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL);
+ r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true);
if (r != X86EMUL_CONTINUE)
return false;
if ((perm >> bit_idx) & mask)
@@ -3053,35 +3062,30 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, u16 old_tss_sel,
ulong old_tss_base, struct desc_struct *new_desc)
{
- const struct x86_emulate_ops *ops = ctxt->ops;
struct tss_segment_16 tss_seg;
int ret;
u32 new_tss_base = get_desc_base(new_desc);
- ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
- &ctxt->exception);
+ ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
if (ret != X86EMUL_CONTINUE)
return ret;
save_state_to_tss16(ctxt, &tss_seg);
- ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
- &ctxt->exception);
+ ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
- &ctxt->exception);
+ ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
if (ret != X86EMUL_CONTINUE)
return ret;
if (old_tss_sel != 0xffff) {
tss_seg.prev_task_link = old_tss_sel;
- ret = ops->write_std(ctxt, new_tss_base,
- &tss_seg.prev_task_link,
- sizeof tss_seg.prev_task_link,
- &ctxt->exception);
+ ret = linear_write_system(ctxt, new_tss_base,
+ &tss_seg.prev_task_link,
+ sizeof tss_seg.prev_task_link);
if (ret != X86EMUL_CONTINUE)
return ret;
}
@@ -3197,38 +3201,34 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
u16 tss_selector, u16 old_tss_sel,
ulong old_tss_base, struct desc_struct *new_desc)
{
- const struct x86_emulate_ops *ops = ctxt->ops;
struct tss_segment_32 tss_seg;
int ret;
u32 new_tss_base = get_desc_base(new_desc);
u32 eip_offset = offsetof(struct tss_segment_32, eip);
u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector);
- ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
- &ctxt->exception);
+ ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg);
if (ret != X86EMUL_CONTINUE)
return ret;
save_state_to_tss32(ctxt, &tss_seg);
/* Only GP registers and segment selectors are saved */
- ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
- ldt_sel_offset - eip_offset, &ctxt->exception);
+ ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip,
+ ldt_sel_offset - eip_offset);
if (ret != X86EMUL_CONTINUE)
return ret;
- ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
- &ctxt->exception);
+ ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg);
if (ret != X86EMUL_CONTINUE)
return ret;
if (old_tss_sel != 0xffff) {
tss_seg.prev_task_link = old_tss_sel;
- ret = ops->write_std(ctxt, new_tss_base,
- &tss_seg.prev_task_link,
- sizeof tss_seg.prev_task_link,
- &ctxt->exception);
+ ret = linear_write_system(ctxt, new_tss_base,
+ &tss_seg.prev_task_link,
+ sizeof tss_seg.prev_task_link);
if (ret != X86EMUL_CONTINUE)
return ret;
}
@@ -4189,7 +4189,9 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt)
maxphyaddr = eax & 0xff;
else
maxphyaddr = 36;
- rsvd = rsvd_bits(maxphyaddr, 62);
+ rsvd = rsvd_bits(maxphyaddr, 63);
+ if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE)
+ rsvd &= ~CR3_PCID_INVD;
}
if (new_val & rsvd)
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 46ff64da44ca..af8caf965baa 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1242,6 +1242,121 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return kvm_hv_get_msr(vcpu, msr, pdata);
}
+static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no)
+{
+ int i = 0, j;
+
+ if (!(valid_bank_mask & BIT_ULL(bank_no)))
+ return -1;
+
+ for (j = 0; j < bank_no; j++)
+ if (valid_bank_mask & BIT_ULL(j))
+ i++;
+
+ return i;
+}
+
+static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
+ u16 rep_cnt, bool ex)
+{
+ struct kvm *kvm = current_vcpu->kvm;
+ struct kvm_vcpu_hv *hv_current = &current_vcpu->arch.hyperv;
+ struct hv_tlb_flush_ex flush_ex;
+ struct hv_tlb_flush flush;
+ struct kvm_vcpu *vcpu;
+ unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0};
+ unsigned long valid_bank_mask = 0;
+ u64 sparse_banks[64];
+ int sparse_banks_len, i;
+ bool all_cpus;
+
+ if (!ex) {
+ if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ trace_kvm_hv_flush_tlb(flush.processor_mask,
+ flush.address_space, flush.flags);
+
+ sparse_banks[0] = flush.processor_mask;
+ all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
+ } else {
+ if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
+ sizeof(flush_ex))))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+
+ trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
+ flush_ex.hv_vp_set.format,
+ flush_ex.address_space,
+ flush_ex.flags);
+
+ valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
+ all_cpus = flush_ex.hv_vp_set.format !=
+ HV_GENERIC_SET_SPARSE_4K;
+
+ sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) *
+ sizeof(sparse_banks[0]);
+
+ if (!sparse_banks_len && !all_cpus)
+ goto ret_success;
+
+ if (!all_cpus &&
+ kvm_read_guest(kvm,
+ ingpa + offsetof(struct hv_tlb_flush_ex,
+ hv_vp_set.bank_contents),
+ sparse_banks,
+ sparse_banks_len))
+ return HV_STATUS_INVALID_HYPERCALL_INPUT;
+ }
+
+ cpumask_clear(&hv_current->tlb_lush);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
+ int bank = hv->vp_index / 64, sbank = 0;
+
+ if (!all_cpus) {
+ /* Banks >64 can't be represented */
+ if (bank >= 64)
+ continue;
+
+ /* Non-ex hypercalls can only address first 64 vCPUs */
+ if (!ex && bank)
+ continue;
+
+ if (ex) {
+ /*
+ * Check is the bank of this vCPU is in sparse
+ * set and get the sparse bank number.
+ */
+ sbank = get_sparse_bank_no(valid_bank_mask,
+ bank);
+
+ if (sbank < 0)
+ continue;
+ }
+
+ if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64)))
+ continue;
+ }
+
+ /*
+ * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we
+ * can't analyze it here, flush TLB regardless of the specified
+ * address space.
+ */
+ __set_bit(i, vcpu_bitmap);
+ }
+
+ kvm_make_vcpus_request_mask(kvm,
+ KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP,
+ vcpu_bitmap, &hv_current->tlb_lush);
+
+ret_success:
+ /* We always do full TLB flush, set rep_done = rep_cnt. */
+ return (u64)HV_STATUS_SUCCESS |
+ ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
+}
+
bool kvm_hv_hypercall_enabled(struct kvm *kvm)
{
return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE;
@@ -1315,7 +1430,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{
u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
uint16_t code, rep_idx, rep_cnt;
- bool fast, longmode;
+ bool fast, longmode, rep;
/*
* hypercall generates UD from non zero cpl and real mode
@@ -1345,31 +1460,34 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
#endif
code = param & 0xffff;
- fast = (param >> 16) & 0x1;
- rep_cnt = (param >> 32) & 0xfff;
- rep_idx = (param >> 48) & 0xfff;
+ fast = !!(param & HV_HYPERCALL_FAST_BIT);
+ rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
+ rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
+ rep = !!(rep_cnt || rep_idx);
trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa);
- /* Hypercall continuation is not supported yet */
- if (rep_cnt || rep_idx) {
- ret = HV_STATUS_INVALID_HYPERCALL_CODE;
- goto out;
- }
-
switch (code) {
case HVCALL_NOTIFY_LONG_SPIN_WAIT:
+ if (unlikely(rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
kvm_vcpu_on_spin(vcpu, true);
break;
case HVCALL_SIGNAL_EVENT:
+ if (unlikely(rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
ret = kvm_hvcall_signal_event(vcpu, fast, ingpa);
if (ret != HV_STATUS_INVALID_PORT_ID)
break;
/* maybe userspace knows this conn_id: fall through */
case HVCALL_POST_MESSAGE:
/* don't bother userspace if it has no way to handle it */
- if (!vcpu_to_synic(vcpu)->active) {
- ret = HV_STATUS_INVALID_HYPERCALL_CODE;
+ if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
break;
}
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
@@ -1380,12 +1498,39 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
vcpu->arch.complete_userspace_io =
kvm_hv_hypercall_complete_userspace;
return 0;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
+ if (unlikely(fast || !rep_cnt || rep_idx)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+ break;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
+ if (unlikely(fast || rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false);
+ break;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
+ if (unlikely(fast || !rep_cnt || rep_idx)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+ break;
+ case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
+ if (unlikely(fast || rep)) {
+ ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ }
+ ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true);
+ break;
default:
ret = HV_STATUS_INVALID_HYPERCALL_CODE;
break;
}
-out:
return kvm_hv_hypercall_complete(vcpu, ret);
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3773c4625114..b5cd8465d44f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2002,13 +2002,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
}
}
- if ((old_value ^ value) & X2APIC_ENABLE) {
- if (value & X2APIC_ENABLE) {
- kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
- kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
- } else
- kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
- }
+ if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
+ kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
+
+ if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
+ kvm_x86_ops->set_virtual_apic_mode(vcpu);
apic->base_address = apic->vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index edce055e9fd7..ed0ed39abd36 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -16,6 +16,13 @@
#define APIC_BUS_CYCLE_NS 1
#define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS)
+enum lapic_mode {
+ LAPIC_MODE_DISABLED = 0,
+ LAPIC_MODE_INVALID = X2APIC_ENABLE,
+ LAPIC_MODE_XAPIC = MSR_IA32_APICBASE_ENABLE,
+ LAPIC_MODE_X2APIC = MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE,
+};
+
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
@@ -89,6 +96,7 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
+enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
@@ -220,4 +228,10 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
+
+static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
+{
+ return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+}
+
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d634f0332c0f..d594690d8b95 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -222,7 +222,6 @@ static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
static void mmu_spte_set(u64 *sptep, u64 spte);
-static void mmu_free_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
{
@@ -3343,51 +3342,48 @@ out_unlock:
return RET_PF_RETRY;
}
-
-static void mmu_free_roots(struct kvm_vcpu *vcpu)
+static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
+ struct list_head *invalid_list)
{
- int i;
struct kvm_mmu_page *sp;
- LIST_HEAD(invalid_list);
- if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ if (!VALID_PAGE(*root_hpa))
return;
- if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL &&
- (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL ||
- vcpu->arch.mmu.direct_map)) {
- hpa_t root = vcpu->arch.mmu.root_hpa;
+ sp = page_header(*root_hpa & PT64_BASE_ADDR_MASK);
+ --sp->root_count;
+ if (!sp->root_count && sp->role.invalid)
+ kvm_mmu_prepare_zap_page(kvm, sp, invalid_list);
- spin_lock(&vcpu->kvm->mmu_lock);
- sp = page_header(root);
- --sp->root_count;
- if (!sp->root_count && sp->role.invalid) {
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list);
- kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
- }
- spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.root_hpa = INVALID_PAGE;
+ *root_hpa = INVALID_PAGE;
+}
+
+void kvm_mmu_free_roots(struct kvm_vcpu *vcpu)
+{
+ int i;
+ LIST_HEAD(invalid_list);
+ struct kvm_mmu *mmu = &vcpu->arch.mmu;
+
+ if (!VALID_PAGE(mmu->root_hpa))
return;
- }
spin_lock(&vcpu->kvm->mmu_lock);
- for (i = 0; i < 4; ++i) {
- hpa_t root = vcpu->arch.mmu.pae_root[i];
- if (root) {
- root &= PT64_BASE_ADDR_MASK;
- sp = page_header(root);
- --sp->root_count;
- if (!sp->root_count && sp->role.invalid)
- kvm_mmu_prepare_zap_page(vcpu->kvm, sp,
- &invalid_list);
- }
- vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
+ if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
+ (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
+ mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
+ } else {
+ for (i = 0; i < 4; ++i)
+ if (mmu->pae_root[i] != 0)
+ mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i],
+ &invalid_list);
+ mmu->root_hpa = INVALID_PAGE;
}
+
kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list);
spin_unlock(&vcpu->kvm->mmu_lock);
- vcpu->arch.mmu.root_hpa = INVALID_PAGE;
}
+EXPORT_SYMBOL_GPL(kvm_mmu_free_roots);
static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
{
@@ -3720,7 +3716,6 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct)
*/
return RET_PF_RETRY;
}
-EXPORT_SYMBOL_GPL(handle_mmio_page_fault);
static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu,
u32 error_code, gfn_t gfn)
@@ -3812,6 +3807,14 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
struct kvm_memory_slot *slot;
bool async;
+ /*
+ * Don't expose private memslots to L2.
+ */
+ if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) {
+ *pfn = KVM_PFN_NOSLOT;
+ return false;
+ }
+
slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
async = false;
*pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
@@ -3951,7 +3954,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu)
{
- mmu_free_roots(vcpu);
+ kvm_mmu_free_roots(vcpu);
}
static unsigned long get_cr3(struct kvm_vcpu *vcpu)
@@ -4473,6 +4476,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
struct kvm_mmu *context = &vcpu->arch.mmu;
context->base_role.word = 0;
+ context->base_role.guest_mode = is_guest_mode(vcpu);
context->base_role.smm = is_smm(vcpu);
context->base_role.ad_disabled = (shadow_accessed_mask == 0);
context->page_fault = tdp_page_fault;
@@ -4539,6 +4543,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
= smep && !is_write_protection(vcpu);
context->base_role.smap_andnot_wp
= smap && !is_write_protection(vcpu);
+ context->base_role.guest_mode = is_guest_mode(vcpu);
context->base_role.smm = is_smm(vcpu);
reset_shadow_zero_bits_mask(vcpu, context);
}
@@ -4564,7 +4569,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->root_hpa = INVALID_PAGE;
context->direct_map = false;
context->base_role.ad_disabled = !accessed_dirty;
-
+ context->base_role.guest_mode = 1;
update_permission_bitmask(vcpu, context, true);
update_pkru_bitmask(vcpu, context, true);
update_last_nonleaf_level(vcpu, context);
@@ -4664,7 +4669,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{
- mmu_free_roots(vcpu);
+ kvm_mmu_free_roots(vcpu);
WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
}
EXPORT_SYMBOL_GPL(kvm_mmu_unload);
@@ -4825,6 +4830,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
mask.smep_andnot_wp = 1;
mask.smap_andnot_wp = 1;
mask.smm = 1;
+ mask.guest_mode = 1;
mask.ad_disabled = 1;
/*
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 950ec50f77c3..695b0bd02220 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1768,7 +1768,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long npages, npinned, size;
unsigned long locked, lock_limit;
struct page **pages;
- int first, last;
+ unsigned long first, last;
+
+ if (ulen == 0 || uaddr + ulen < uaddr)
+ return NULL;
/* Calculate number of pages. */
first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
@@ -1855,13 +1858,13 @@ static void __unregister_enc_region_locked(struct kvm *kvm,
static struct kvm *svm_vm_alloc(void)
{
- struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL);
+ struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm));
return &kvm_svm->kvm;
}
static void svm_vm_free(struct kvm *kvm)
{
- kfree(to_kvm_svm(kvm));
+ vfree(to_kvm_svm(kvm));
}
static void sev_vm_destroy(struct kvm *kvm)
@@ -5062,7 +5065,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
}
-static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
+static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
{
return;
}
@@ -6949