diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/hyperv/mmu.c | 28 | ||||
-rw-r--r-- | arch/x86/include/asm/hyperv-tlfs.h | 25 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_emulate.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 7 | ||||
-rw-r--r-- | arch/x86/include/asm/mshyperv.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/vmx.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/emulate.c | 76 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 171 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.c | 12 | ||||
-rw-r--r-- | arch/x86/kvm/lapic.h | 14 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 78 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 51 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 489 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 97 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 4 |
17 files changed, 813 insertions, 268 deletions
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 5f053d7d1bd9..de27615c51ea 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -13,22 +13,6 @@ #define CREATE_TRACE_POINTS #include <asm/trace/hyperv.h> -/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ -struct hv_flush_pcpu { - u64 address_space; - u64 flags; - u64 processor_mask; - u64 gva_list[]; -}; - -/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ -struct hv_flush_pcpu_ex { - u64 address_space; - u64 flags; - struct hv_vpset hv_vp_set; - u64 gva_list[]; -}; - /* Each gva in gva_list encodes up to 4096 pages to flush */ #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) @@ -67,8 +51,8 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, const struct flush_tlb_info *info) { int cpu, vcpu, gva_n, max_gvas; - struct hv_flush_pcpu **flush_pcpu; - struct hv_flush_pcpu *flush; + struct hv_tlb_flush **flush_pcpu; + struct hv_tlb_flush *flush; u64 status = U64_MAX; unsigned long flags; @@ -82,7 +66,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = (struct hv_flush_pcpu **) + flush_pcpu = (struct hv_tlb_flush **) this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; @@ -152,8 +136,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, const struct flush_tlb_info *info) { int nr_bank = 0, max_gvas, gva_n; - struct hv_flush_pcpu_ex **flush_pcpu; - struct hv_flush_pcpu_ex *flush; + struct hv_tlb_flush_ex **flush_pcpu; + struct hv_tlb_flush_ex *flush; u64 status = U64_MAX; unsigned long flags; @@ -167,7 +151,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = (struct hv_flush_pcpu_ex **) + flush_pcpu = (struct hv_tlb_flush_ex **) this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 3bfa92c2793c..b8c89265baf0 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -308,6 +308,9 @@ struct ms_hyperv_tsc_page { /* TSC emulation after migration */ #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +/* Nested features (CPUID 0x4000000A) EAX */ +#define HV_X64_NESTED_MSR_BITMAP BIT(19) + struct hv_reenlightenment_control { __u64 vector:8; __u64 reserved1:8; @@ -678,7 +681,11 @@ struct hv_enlightened_vmcs { u32 hv_clean_fields; u32 hv_padding_32; u32 hv_synthetic_controls; - u32 hv_enlightenments_control; + struct { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 reserved:30; + } hv_enlightenments_control; u32 hv_vp_id; u64 hv_vm_id; @@ -734,4 +741,20 @@ struct ipi_arg_ex { struct hv_vpset vp_set; }; +/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ +struct hv_tlb_flush { + u64 address_space; + u64 flags; + u64 processor_mask; + u64 gva_list[]; +}; + +/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ +struct hv_tlb_flush_ex { + u64 address_space; + u64 flags; + struct hv_vpset hv_vp_set; + u64 gva_list[]; +}; + #endif diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index b24b1c8b3979..0f82cd91cd3c 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -107,11 +107,12 @@ struct x86_emulate_ops { * @addr: [IN ] Linear address from which to read. * @val: [OUT] Value read from memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to read from memory. + * @system:[IN ] Whether the access is forced to be at CPL0. */ int (*read_std)(struct x86_emulate_ctxt *ctxt, unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); + struct x86_exception *fault, bool system); /* * read_phys: Read bytes of standard (non-emulated/special) memory. @@ -129,10 +130,11 @@ struct x86_emulate_ops { * @addr: [IN ] Linear address to which to write. * @val: [OUT] Value write to memory, zero-extended to 'u_long'. * @bytes: [IN ] Number of bytes to write to memory. + * @system:[IN ] Whether the access is forced to be at CPL0. */ int (*write_std)(struct x86_emulate_ctxt *ctxt, unsigned long addr, void *val, unsigned int bytes, - struct x86_exception *fault); + struct x86_exception *fault, bool system); /* * fetch: Read bytes of standard (non-emulated/special) memory. * Used for instruction fetch. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f4b2588865e9..c13cd28d9d1b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -258,7 +258,8 @@ union kvm_mmu_page_role { unsigned smep_andnot_wp:1; unsigned smap_andnot_wp:1; unsigned ad_disabled:1; - unsigned :7; + unsigned guest_mode:1; + unsigned :6; /* * This is left at the top of the word so that @@ -476,6 +477,7 @@ struct kvm_vcpu_hv { struct kvm_hyperv_exit exit; struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); + cpumask_t tlb_lush; }; struct kvm_vcpu_arch { @@ -995,7 +997,7 @@ struct kvm_x86_ops { void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); - void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); + void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); @@ -1277,6 +1279,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu); gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 997192131b7b..3cd14311edfa 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -269,7 +269,7 @@ static inline int cpumask_to_vpset(struct hv_vpset *vpset, return 0; /* - * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex + * Clear all banks up to the maximum possible bank as hv_tlb_flush_ex * structs are not cleared between calls, we risk flushing unneeded * vCPUs otherwise. */ diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 5db8b0b10766..425e6b8b9547 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -207,7 +207,9 @@ enum vmcs_field { EPTP_LIST_ADDRESS = 0x00002024, EPTP_LIST_ADDRESS_HIGH = 0x00002025, VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, XSS_EXIT_BITMAP = 0x0000202C, XSS_EXIT_BITMAP_HIGH = 0x0000202D, TSC_MULTIPLIER = 0x00002032, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f4f30d0c25c4..5720e78b2f7b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -404,7 +404,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_cpuid_7_0_ecx_x86_features = F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | - F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG); + F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | + F(CLDEMOTE); /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b3705ae52824..4c4f4263420c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -812,6 +812,19 @@ static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) return assign_eip_near(ctxt, ctxt->_eip + rel); } +static int linear_read_system(struct x86_emulate_ctxt *ctxt, ulong linear, + void *data, unsigned size) +{ + return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, true); +} + +static int linear_write_system(struct x86_emulate_ctxt *ctxt, + ulong linear, void *data, + unsigned int size) +{ + return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, true); +} + static int segmented_read_std(struct x86_emulate_ctxt *ctxt, struct segmented_address addr, void *data, @@ -823,7 +836,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, rc = linearize(ctxt, addr, size, false, &linear); if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); + return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception, false); } static int segmented_write_std(struct x86_emulate_ctxt *ctxt, @@ -837,7 +850,7 @@ static int segmented_write_std(struct x86_emulate_ctxt *ctxt, rc = linearize(ctxt, addr, size, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception); + return ctxt->ops->write_std(ctxt, linear, data, size, &ctxt->exception, false); } /* @@ -1496,8 +1509,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt, return emulate_gp(ctxt, index << 3 | 0x2); addr = dt.address + index * 8; - return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, - &ctxt->exception); + return linear_read_system(ctxt, addr, desc, sizeof *desc); } static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, @@ -1560,8 +1572,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->read_std(ctxt, *desc_addr_p, desc, sizeof(*desc), - &ctxt->exception); + return linear_read_system(ctxt, *desc_addr_p, desc, sizeof(*desc)); } /* allowed just for 8 bytes segments */ @@ -1575,8 +1586,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; - return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc, - &ctxt->exception); + return linear_write_system(ctxt, addr, desc, sizeof *desc); } static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, @@ -1737,8 +1747,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, return ret; } } else if (ctxt->mode == X86EMUL_MODE_PROT64) { - ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, - sizeof(base3), &ctxt->exception); + ret = linear_read_system(ctxt, desc_addr+8, &base3, sizeof(base3)); if (ret != X86EMUL_CONTINUE) return ret; if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | @@ -2051,11 +2060,11 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) eip_addr = dt.address + (irq << 2); cs_addr = dt.address + (irq << 2) + 2; - rc = ops->read_std(ctxt, cs_addr, &cs, 2, &ctxt->exception); + rc = linear_read_system(ctxt, cs_addr, &cs, 2); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->read_std(ctxt, eip_addr, &eip, 2, &ctxt->exception); + rc = linear_read_system(ctxt, eip_addr, &eip, 2); if (rc != X86EMUL_CONTINUE) return rc; @@ -2919,12 +2928,12 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, #ifdef CONFIG_X86_64 base |= ((u64)base3) << 32; #endif - r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL); + r = ops->read_std(ctxt, base + 102, &io_bitmap_ptr, 2, NULL, true); if (r != X86EMUL_CONTINUE) return false; if (io_bitmap_ptr + port/8 > desc_limit_scaled(&tr_seg)) return false; - r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL); + r = ops->read_std(ctxt, base + io_bitmap_ptr + port/8, &perm, 2, NULL, true); if (r != X86EMUL_CONTINUE) return false; if ((perm >> bit_idx) & mask) @@ -3053,35 +3062,30 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, u16 tss_selector, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { - const struct x86_emulate_ops *ops = ctxt->ops; struct tss_segment_16 tss_seg; int ret; u32 new_tss_base = get_desc_base(new_desc); - ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, - &ctxt->exception); + ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); if (ret != X86EMUL_CONTINUE) return ret; save_state_to_tss16(ctxt, &tss_seg); - ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, - &ctxt->exception); + ret = linear_write_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); if (ret != X86EMUL_CONTINUE) return ret; - ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, - &ctxt->exception); + ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg); if (ret != X86EMUL_CONTINUE) return ret; if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(ctxt, new_tss_base, - &tss_seg.prev_task_link, - sizeof tss_seg.prev_task_link, - &ctxt->exception); + ret = linear_write_system(ctxt, new_tss_base, + &tss_seg.prev_task_link, + sizeof tss_seg.prev_task_link); if (ret != X86EMUL_CONTINUE) return ret; } @@ -3197,38 +3201,34 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, u16 tss_selector, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { - const struct x86_emulate_ops *ops = ctxt->ops; struct tss_segment_32 tss_seg; int ret; u32 new_tss_base = get_desc_base(new_desc); u32 eip_offset = offsetof(struct tss_segment_32, eip); u32 ldt_sel_offset = offsetof(struct tss_segment_32, ldt_selector); - ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, - &ctxt->exception); + ret = linear_read_system(ctxt, old_tss_base, &tss_seg, sizeof tss_seg); if (ret != X86EMUL_CONTINUE) return ret; save_state_to_tss32(ctxt, &tss_seg); /* Only GP registers and segment selectors are saved */ - ret = ops->write_std(ctxt, old_tss_base + eip_offset, &tss_seg.eip, - ldt_sel_offset - eip_offset, &ctxt->exception); + ret = linear_write_system(ctxt, old_tss_base + eip_offset, &tss_seg.eip, + ldt_sel_offset - eip_offset); if (ret != X86EMUL_CONTINUE) return ret; - ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg, - &ctxt->exception); + ret = linear_read_system(ctxt, new_tss_base, &tss_seg, sizeof tss_seg); if (ret != X86EMUL_CONTINUE) return ret; if (old_tss_sel != 0xffff) { tss_seg.prev_task_link = old_tss_sel; - ret = ops->write_std(ctxt, new_tss_base, - &tss_seg.prev_task_link, - sizeof tss_seg.prev_task_link, - &ctxt->exception); + ret = linear_write_system(ctxt, new_tss_base, + &tss_seg.prev_task_link, + sizeof tss_seg.prev_task_link); if (ret != X86EMUL_CONTINUE) return ret; } @@ -4189,7 +4189,9 @@ static int check_cr_write(struct x86_emulate_ctxt *ctxt) maxphyaddr = eax & 0xff; else maxphyaddr = 36; - rsvd = rsvd_bits(maxphyaddr, 62); + rsvd = rsvd_bits(maxphyaddr, 63); + if (ctxt->ops->get_cr(ctxt, 4) & X86_CR4_PCIDE) + rsvd &= ~CR3_PCID_INVD; } if (new_val & rsvd) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 46ff64da44ca..af8caf965baa 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1242,6 +1242,121 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) return kvm_hv_get_msr(vcpu, msr, pdata); } +static __always_inline int get_sparse_bank_no(u64 valid_bank_mask, int bank_no) +{ + int i = 0, j; + + if (!(valid_bank_mask & BIT_ULL(bank_no))) + return -1; + + for (j = 0; j < bank_no; j++) + if (valid_bank_mask & BIT_ULL(j)) + i++; + + return i; +} + +static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, + u16 rep_cnt, bool ex) +{ + struct kvm *kvm = current_vcpu->kvm; + struct kvm_vcpu_hv *hv_current = ¤t_vcpu->arch.hyperv; + struct hv_tlb_flush_ex flush_ex; + struct hv_tlb_flush flush; + struct kvm_vcpu *vcpu; + unsigned long vcpu_bitmap[BITS_TO_LONGS(KVM_MAX_VCPUS)] = {0}; + unsigned long valid_bank_mask = 0; + u64 sparse_banks[64]; + int sparse_banks_len, i; + bool all_cpus; + + if (!ex) { + if (unlikely(kvm_read_guest(kvm, ingpa, &flush, sizeof(flush)))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + + trace_kvm_hv_flush_tlb(flush.processor_mask, + flush.address_space, flush.flags); + + sparse_banks[0] = flush.processor_mask; + all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS; + } else { + if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex, + sizeof(flush_ex)))) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + + trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, + flush_ex.hv_vp_set.format, + flush_ex.address_space, + flush_ex.flags); + + valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; + all_cpus = flush_ex.hv_vp_set.format != + HV_GENERIC_SET_SPARSE_4K; + + sparse_banks_len = bitmap_weight(&valid_bank_mask, 64) * + sizeof(sparse_banks[0]); + + if (!sparse_banks_len && !all_cpus) + goto ret_success; + + if (!all_cpus && + kvm_read_guest(kvm, + ingpa + offsetof(struct hv_tlb_flush_ex, + hv_vp_set.bank_contents), + sparse_banks, + sparse_banks_len)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } + + cpumask_clear(&hv_current->tlb_lush); + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; + int bank = hv->vp_index / 64, sbank = 0; + + if (!all_cpus) { + /* Banks >64 can't be represented */ + if (bank >= 64) + continue; + + /* Non-ex hypercalls can only address first 64 vCPUs */ + if (!ex && bank) + continue; + + if (ex) { + /* + * Check is the bank of this vCPU is in sparse + * set and get the sparse bank number. + */ + sbank = get_sparse_bank_no(valid_bank_mask, + bank); + + if (sbank < 0) + continue; + } + + if (!(sparse_banks[sbank] & BIT_ULL(hv->vp_index % 64))) + continue; + } + + /* + * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we + * can't analyze it here, flush TLB regardless of the specified + * address space. + */ + __set_bit(i, vcpu_bitmap); + } + + kvm_make_vcpus_request_mask(kvm, + KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, + vcpu_bitmap, &hv_current->tlb_lush); + +ret_success: + /* We always do full TLB flush, set rep_done = rep_cnt. */ + return (u64)HV_STATUS_SUCCESS | + ((u64)rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); +} + bool kvm_hv_hypercall_enabled(struct kvm *kvm) { return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; @@ -1315,7 +1430,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; uint16_t code, rep_idx, rep_cnt; - bool fast, longmode; + bool fast, longmode, rep; /* * hypercall generates UD from non zero cpl and real mode @@ -1345,31 +1460,34 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) #endif code = param & 0xffff; - fast = (param >> 16) & 0x1; - rep_cnt = (param >> 32) & 0xfff; - rep_idx = (param >> 48) & 0xfff; + fast = !!(param & HV_HYPERCALL_FAST_BIT); + rep_cnt = (param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff; + rep_idx = (param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; + rep = !!(rep_cnt || rep_idx); trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); - /* Hypercall continuation is not supported yet */ - if (rep_cnt || rep_idx) { - ret = HV_STATUS_INVALID_HYPERCALL_CODE; - goto out; - } - switch (code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: + if (unlikely(rep)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + } kvm_vcpu_on_spin(vcpu, true); break; case HVCALL_SIGNAL_EVENT: + if (unlikely(rep)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + } ret = kvm_hvcall_signal_event(vcpu, fast, ingpa); if (ret != HV_STATUS_INVALID_PORT_ID) break; /* maybe userspace knows this conn_id: fall through */ case HVCALL_POST_MESSAGE: /* don't bother userspace if it has no way to handle it */ - if (!vcpu_to_synic(vcpu)->active) { - ret = HV_STATUS_INVALID_HYPERCALL_CODE; + if (unlikely(rep || !vcpu_to_synic(vcpu)->active)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; break; } vcpu->run->exit_reason = KVM_EXIT_HYPERV; @@ -1380,12 +1498,39 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) vcpu->arch.complete_userspace_io = kvm_hv_hypercall_complete_userspace; return 0; + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST: + if (unlikely(fast || !rep_cnt || rep_idx)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + } + ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); + break; + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE: + if (unlikely(fast || rep)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + } + ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, false); + break; + case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX: + if (unlikely(fast || !rep_cnt || rep_idx)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + } + ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); + break; + case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX: + if (unlikely(fast || rep)) { + ret = HV_STATUS_INVALID_HYPERCALL_INPUT; + break; + } + ret = kvm_hv_flush_tlb(vcpu, ingpa, rep_cnt, true); + break; default: ret = HV_STATUS_INVALID_HYPERCALL_CODE; break; } -out: return kvm_hv_hypercall_complete(vcpu, ret); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3773c4625114..b5cd8465d44f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2002,13 +2002,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) } } - if ((old_value ^ value) & X2APIC_ENABLE) { - if (value & X2APIC_ENABLE) { - kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); - kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); - } else - kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); - } + if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE)) + kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id); + + if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) + kvm_x86_ops->set_virtual_apic_mode(vcpu); apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index edce055e9fd7..ed0ed39abd36 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -16,6 +16,13 @@ #define APIC_BUS_CYCLE_NS 1 #define APIC_BUS_FREQUENCY (1000000000ULL / APIC_BUS_CYCLE_NS) +enum lapic_mode { + LAPIC_MODE_DISABLED = 0, + LAPIC_MODE_INVALID = X2APIC_ENABLE, + LAPIC_MODE_XAPIC = MSR_IA32_APICBASE_ENABLE, + LAPIC_MODE_X2APIC = MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE, +}; + struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ @@ -89,6 +96,7 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s); +enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); @@ -220,4 +228,10 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); + +static inline enum lapic_mode kvm_apic_mode(u64 apic_base) +{ + return apic_base & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); +} + #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d634f0332c0f..d594690d8b95 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -222,7 +222,6 @@ static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; static void mmu_spte_set(u64 *sptep, u64 spte); -static void mmu_free_roots(struct kvm_vcpu *vcpu); void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) { @@ -3343,51 +3342,48 @@ out_unlock: return RET_PF_RETRY; } - -static void mmu_free_roots(struct kvm_vcpu *vcpu) +static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, + struct list_head *invalid_list) { - int i; struct kvm_mmu_page *sp; - LIST_HEAD(invalid_list); - if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + if (!VALID_PAGE(*root_hpa)) return; - if (vcpu->arch.mmu.shadow_root_level >= PT64_ROOT_4LEVEL && - (vcpu->arch.mmu.root_level >= PT64_ROOT_4LEVEL || - vcpu->arch.mmu.direct_map)) { - hpa_t root = vcpu->arch.mmu.root_hpa; + sp = page_header(*root_hpa & PT64_BASE_ADDR_MASK); + --sp->root_count; + if (!sp->root_count && sp->role.invalid) + kvm_mmu_prepare_zap_page(kvm, sp, invalid_list); - spin_lock(&vcpu->kvm->mmu_lock); - sp = page_header(root); - --sp->root_count; - if (!sp->root_count && sp->role.invalid) { - kvm_mmu_prepare_zap_page(vcpu->kvm, sp, &invalid_list); - kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); - } - spin_unlock(&vcpu->kvm->mmu_lock); - vcpu->arch.mmu.root_hpa = INVALID_PAGE; + *root_hpa = INVALID_PAGE; +} + +void kvm_mmu_free_roots(struct kvm_vcpu *vcpu) +{ + int i; + LIST_HEAD(invalid_list); + struct kvm_mmu *mmu = &vcpu->arch.mmu; + + if (!VALID_PAGE(mmu->root_hpa)) return; - } spin_lock(&vcpu->kvm->mmu_lock); - for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu.pae_root[i]; - if (root) { - root &= PT64_BASE_ADDR_MASK; - sp = page_header(root); - --sp->root_count; - if (!sp->root_count && sp->role.invalid) - kvm_mmu_prepare_zap_page(vcpu->kvm, sp, - &invalid_list); - } - vcpu->arch.mmu.pae_root[i] = INVALID_PAGE; + if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && + (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) { + mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list); + } else { + for (i = 0; i < 4; ++i) + if (mmu->pae_root[i] != 0) + mmu_free_root_page(vcpu->kvm, &mmu->pae_root[i], + &invalid_list); + mmu->root_hpa = INVALID_PAGE; } + kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); spin_unlock(&vcpu->kvm->mmu_lock); - vcpu->arch.mmu.root_hpa = INVALID_PAGE; } +EXPORT_SYMBOL_GPL(kvm_mmu_free_roots); static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) { @@ -3720,7 +3716,6 @@ static int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct) */ return RET_PF_RETRY; } -EXPORT_SYMBOL_GPL(handle_mmio_page_fault); static bool page_fault_handle_page_track(struct kvm_vcpu *vcpu, u32 error_code, gfn_t gfn) @@ -3812,6 +3807,14 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, struct kvm_memory_slot *slot; bool async; + /* + * Don't expose private memslots to L2. + */ + if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) { + *pfn = KVM_PFN_NOSLOT; + return false; + } + slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); async = false; *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable); @@ -3951,7 +3954,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) { - mmu_free_roots(vcpu); + kvm_mmu_free_roots(vcpu); } static unsigned long get_cr3(struct kvm_vcpu *vcpu) @@ -4473,6 +4476,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) struct kvm_mmu *context = &vcpu->arch.mmu; context->base_role.word = 0; + context->base_role.guest_mode = is_guest_mode(vcpu); context->base_role.smm = is_smm(vcpu); context->base_role.ad_disabled = (shadow_accessed_mask == 0); context->page_fault = tdp_page_fault; @@ -4539,6 +4543,7 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) = smep && !is_write_protection(vcpu); context->base_role.smap_andnot_wp = smap && !is_write_protection(vcpu); + context->base_role.guest_mode = is_guest_mode(vcpu); context->base_role.smm = is_smm(vcpu); reset_shadow_zero_bits_mask(vcpu, context); } @@ -4564,7 +4569,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, context->root_hpa = INVALID_PAGE; context->direct_map = false; context->base_role.ad_disabled = !accessed_dirty; - + context->base_role.guest_mode = 1; update_permission_bitmask(vcpu, context, true); update_pkru_bitmask(vcpu, context, true); update_last_nonleaf_level(vcpu, context); @@ -4664,7 +4669,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load); void kvm_mmu_unload(struct kvm_vcpu *vcpu) { - mmu_free_roots(vcpu); + kvm_mmu_free_roots(vcpu); WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); } EXPORT_SYMBOL_GPL(kvm_mmu_unload); @@ -4825,6 +4830,7 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mask.smep_andnot_wp = 1; mask.smap_andnot_wp = 1; mask.smm = 1; + mask.guest_mode = 1; mask.ad_disabled = 1; /* diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 950ec50f77c3..695b0bd02220 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1768,7 +1768,10 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, unsigned long npages, npinned, size; unsigned long locked, lock_limit; struct page **pages; - int first, last; + unsigned long first, last; + + if (ulen == 0 || uaddr + ulen < uaddr) + return NULL; /* Calculate number of pages. */ first = (uaddr & PAGE_MASK) >> PAGE_SHIFT; @@ -1855,13 +1858,13 @@ static void __unregister_enc_region_locked(struct kvm *kvm, static struct kvm *svm_vm_alloc(void) { - struct kvm_svm *kvm_svm = kzalloc(sizeof(struct kvm_svm), GFP_KERNEL); + struct kvm_svm *kvm_svm = vzalloc(sizeof(struct kvm_svm)); return &kvm_svm->kvm; } static void svm_vm_free(struct kvm *kvm) { - kfree(to_kvm_svm(kvm)); + vfree(to_kvm_svm(kvm)); } static void sev_vm_destroy(struct kvm *kvm) @@ -5062,7 +5065,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } -static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +static void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu) { return; } @@ -6949 |