diff options
71 files changed, 1633 insertions, 1212 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 254bfafdcbcd..ef66b3c45ba2 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5804,8 +5804,9 @@ panic() code such as dumping handler. xen_nopvspin [X86,XEN] - Disables the ticketlock slowpath using Xen PV - optimizations. + Disables the qspinlock slowpath using Xen PV optimizations. + This parameter is obsoleted by "nopvspin" parameter, which + has equivalent effect for XEN platform. xen_nopv [X86] Disables the PV optimizations forcing the HVM guest to @@ -5831,6 +5832,11 @@ as generic guest with no PV drivers. Currently support XEN HVM, KVM, HYPER_V and VMWARE guest. + nopvspin [X86,XEN,KVM] + Disables the qspinlock slow path using PV optimizations + which allow the hypervisor to 'idle' the guest on lock + contention. + xirc2ps_cs= [NET,PCMCIA] Format: <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]] diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index fe05201e17be..cdfd98155311 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -669,6 +669,10 @@ MSRs that have been set successfully. Defines the vcpu responses to the cpuid instruction. Applications should use the KVM_SET_CPUID2 ioctl if available. +Note, when this IOCTL fails, KVM gives no guarantees that previous valid CPUID +configuration (if there is) is not corrupted. Userspace can get a copy of the +resulting CPUID configuration through KVM_GET_CPUID2 in case. + :: struct kvm_cpuid_entry { @@ -4795,6 +4799,7 @@ hardware_exit_reason. /* KVM_EXIT_FAIL_ENTRY */ struct { __u64 hardware_entry_failure_reason; + __u32 cpu; /* if KVM_LAST_CPU */ } fail_entry; If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due diff --git a/arch/arm64/include/asm/kvm_coproc.h b/arch/arm64/include/asm/kvm_coproc.h index 0185ee8b8b5e..454373704b8a 100644 --- a/arch/arm64/include/asm/kvm_coproc.h +++ b/arch/arm64/include/asm/kvm_coproc.h @@ -27,12 +27,12 @@ struct kvm_sys_reg_target_table { void kvm_register_target_sys_reg_table(unsigned int target, struct kvm_sys_reg_target_table *table); -int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp14_64(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp15_32(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_cp15_64(struct kvm_vcpu *vcpu, struct kvm_run *run); -int kvm_handle_sys_reg(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_cp14_load_store(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp14_64(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_32(struct kvm_vcpu *vcpu); +int kvm_handle_cp15_64(struct kvm_vcpu *vcpu); +int kvm_handle_sys_reg(struct kvm_vcpu *vcpu); #define kvm_coproc_table_init kvm_sys_reg_table_init void kvm_sys_reg_table_init(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e21d4a01372f..f81151ad3d3c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -97,17 +97,6 @@ struct kvm_arch { bool return_nisv_io_abort_to_user; }; -#define KVM_NR_MEM_OBJS 40 - -/* - * We don't want allocation failures within the mmu code, so we preallocate - * enough memory for a single page fault in a cache. - */ -struct kvm_mmu_memory_cache { - int nobjs; - void *objects[KVM_NR_MEM_OBJS]; -}; - struct kvm_vcpu_fault_info { u32 esr_el2; /* Hyp Syndrom Register */ u64 far_el2; /* Hyp Fault Address Register */ @@ -486,18 +475,15 @@ u64 __kvm_call_hyp(void *hypfn, ...); void force_vm_exit(const cpumask_t *mask); void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); -int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, - int exception_index); -void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, - int exception_index); +int handle_exit(struct kvm_vcpu *vcpu, int exception_index); +void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index); /* MMIO helpers */ void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data); unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len); -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa); +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu); +int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa); int kvm_perf_init(void); int kvm_perf_teardown(void); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b12bfc1f051a..40be8f6c7351 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -139,7 +139,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); -int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_types.h b/arch/arm64/include/asm/kvm_types.h new file mode 100644 index 000000000000..9a126b9e2d7c --- /dev/null +++ b/arch/arm64/include/asm/kvm_types.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM64_KVM_TYPES_H +#define _ASM_ARM64_KVM_TYPES_H + +#define KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE 40 + +#endif /* _ASM_ARM64_KVM_TYPES_H */ + diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 90cb90561446..73e12869afe3 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -270,6 +270,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.target = -1; bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; + /* Set up the timer */ kvm_timer_vcpu_init(vcpu); @@ -658,7 +660,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) return ret; if (run->exit_reason == KVM_EXIT_MMIO) { - ret = kvm_handle_mmio_return(vcpu, run); + ret = kvm_handle_mmio_return(vcpu); if (ret) return ret; } @@ -810,11 +812,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* Exit types that need handling before we can be preempted */ - handle_exit_early(vcpu, run, ret); + handle_exit_early(vcpu, ret); preempt_enable(); - ret = handle_exit(vcpu, run, ret); + ret = handle_exit(vcpu, ret); } /* Tell userspace about in-kernel device output levels */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5a02d4c90559..1df3beafd73f 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -25,7 +25,7 @@ #define CREATE_TRACE_POINTS #include "trace_handle_exit.h" -typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); +typedef int (*exit_handle_fn)(struct kvm_vcpu *); static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr) { @@ -33,7 +33,7 @@ static void kvm_handle_guest_serror(struct kvm_vcpu *vcpu, u32 esr) kvm_inject_vabt(vcpu); } -static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int handle_hvc(struct kvm_vcpu *vcpu) { int ret; @@ -50,7 +50,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } -static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int handle_smc(struct kvm_vcpu *vcpu) { /* * "If an SMC instruction executed at Non-secure EL1 is @@ -69,7 +69,7 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) * Guest access to FP/ASIMD registers are routed to this handler only * when the system doesn't support FP/ASIMD. */ -static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int handle_no_fpsimd(struct kvm_vcpu *vcpu) { kvm_inject_undefined(vcpu); return 1; @@ -87,7 +87,7 @@ static int handle_no_fpsimd(struct kvm_vcpu *vcpu, struct kvm_run *run) * world-switches and schedule other host processes until there is an * incoming IRQ or FIQ to the VM. */ -static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_wfx(struct kvm_vcpu *vcpu) { if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); @@ -109,16 +109,16 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) * kvm_handle_guest_debug - handle a debug exception instruction * * @vcpu: the vcpu pointer - * @run: access to the kvm_run structure for results * * We route all debug exceptions through the same handler. If both the * guest and host are using the same debug facilities it will be up to * userspace to re-inject the correct exception for guest delivery. * - * @return: 0 (while setting run->exit_reason), -1 for error + * @return: 0 (while setting vcpu->run->exit_reason), -1 for error */ -static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; u32 hsr = kvm_vcpu_get_hsr(vcpu); int ret = 0; @@ -144,7 +144,7 @@ static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } -static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu) { u32 hsr = kvm_vcpu_get_hsr(vcpu); @@ -155,7 +155,7 @@ static int kvm_handle_unknown_ec(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int handle_sve(struct kvm_vcpu *vcpu) { /* Until SVE is supported for guests: */ kvm_inject_undefined(vcpu); @@ -167,7 +167,7 @@ static int handle_sve(struct kvm_vcpu *vcpu, struct kvm_run *run) * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all * that we can do is give the guest an UNDEF. */ -static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu) { kvm_inject_undefined(vcpu); return 1; @@ -212,7 +212,7 @@ static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) * KVM_EXIT_DEBUG, otherwise userspace needs to complete its * emulation first. */ -static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run) +static int handle_trap_exceptions(struct kvm_vcpu *vcpu) { int handled; @@ -227,7 +227,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run) exit_handle_fn exit_handler; exit_handler = kvm_get_exit_handler(vcpu); - handled = exit_handler(vcpu, run); + handled = exit_handler(vcpu); } return handled; @@ -237,9 +237,10 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu, struct kvm_run *run) * Return > 0 to return to guest, < 0 on error, 0 (and set exit_reason) on * proper exit to userspace. */ -int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, - int exception_index) +int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { + struct kvm_run *run = vcpu->run; + if (ARM_SERROR_PENDING(exception_index)) { u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); @@ -265,7 +266,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, case ARM_EXCEPTION_EL1_SERROR: return 1; case ARM_EXCEPTION_TRAP: - return handle_trap_exceptions(vcpu, run); + return handle_trap_exceptions(vcpu); case ARM_EXCEPTION_HYP_GONE: /* * EL2 has been reset to the hyp-stub. This happens when a guest @@ -289,8 +290,7 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, } /* For exit types that need handling before we can be preempted */ -void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run, - int exception_index) +void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) { if (ARM_SERROR_PENDING(exception_index)) { if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) { diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 4e0366759726..158fbe682611 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -77,9 +77,8 @@ unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len) * or in-kernel IO emulation * * @vcpu: The VCPU pointer - * @run: The VCPU run struct containing the mmio data */ -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu) { unsigned long data; unsigned int len; @@ -92,6 +91,8 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mmio_needed = 0; if (!kvm_vcpu_dabt_iswrite(vcpu)) { + struct kvm_run *run = vcpu->run; + len = kvm_vcpu_dabt_get_as(vcpu); data = kvm_mmio_read_buf(run->mmio.data, len); @@ -119,9 +120,9 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, - phys_addr_t fault_ipa) +int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) { + struct kvm_run *run = vcpu->run; unsigned long data; unsigned long rt; int ret; @@ -188,7 +189,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, if (!is_write) memcpy(run->mmio.data, data_buf, len); vcpu->stat.mmio_exit_kernel++; - kvm_handle_mmio_return(vcpu, run); + kvm_handle_mmio_return(vcpu); return 1; } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 31058e6e7c2a..7a7ddc4558a7 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -124,38 +124,6 @@ static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) put_page(virt_to_page(pudp)); } -static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, - int min, int max) -{ - void *page; - - BUG_ON(max > KVM_NR_MEM_OBJS); - if (cache->nobjs >= min) - return 0; - while (cache->nobjs < max) { - page = (void *)__get_free_page(GFP_PGTABLE_USER); - if (!page) - return -ENOMEM; - cache->objects[cache->nobjs++] = page; - } - return 0; -} - -static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) -{ - while (mc->nobjs) - free_page((unsigned long)mc->objects[--mc->nobjs]); -} - -static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) -{ - void *p; - - BUG_ON(!mc || !mc->nobjs); - p = mc->objects[--mc->nobjs]; - return p; -} - static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) { p4d_t *p4d_table __maybe_unused = stage2_p4d_offset(kvm, pgd, 0UL); @@ -1132,7 +1100,7 @@ static p4d_t *stage2_get_p4d(struct kvm *kvm, struct kvm_mmu_memory_cache *cache if (stage2_pgd_none(kvm, *pgd)) { if (!cache) return NULL; - p4d = mmu_memory_cache_alloc(cache); + p4d = kvm_mmu_memory_cache_alloc(cache); stage2_pgd_populate(kvm, pgd, p4d); get_page(virt_to_page(pgd)); } @@ -1150,7 +1118,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache if (stage2_p4d_none(kvm, *p4d)) { if (!cache) return NULL; - pud = mmu_memory_cache_alloc(cache); + pud = kvm_mmu_memory_cache_alloc(cache); stage2_p4d_populate(kvm, p4d, pud); get_page(virt_to_page(p4d)); } @@ -1171,7 +1139,7 @@ static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache if (stage2_pud_none(kvm, *pud)) { if (!cache) return NULL; - pmd = mmu_memory_cache_alloc(cache); + pmd = kvm_mmu_memory_cache_alloc(cache); stage2_pud_populate(kvm, pud, pmd); get_page(virt_to_page(pud)); } @@ -1377,7 +1345,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, if (stage2_pud_none(kvm, *pud)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ - pmd = mmu_memory_cache_alloc(cache); + pmd = kvm_mmu_memory_cache_alloc(cache); stage2_pud_populate(kvm, pud, pmd); get_page(virt_to_page(pud)); } @@ -1402,7 +1370,7 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, if (pmd_none(*pmd)) { if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ - pte = mmu_memory_cache_alloc(cache); + pte = kvm_mmu_memory_cache_alloc(cache); kvm_pmd_populate(pmd, pte); get_page(virt_to_page(pmd)); } @@ -1469,7 +1437,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t addr, end; int ret = 0; unsigned long pfn; - struct kvm_mmu_memory_cache cache = { 0, }; + struct kvm_mmu_memory_cache cache = { 0, __GFP_ZERO, NULL, }; end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(pa); @@ -1480,9 +1448,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, if (writable) pte = kvm_s2pte_mkwrite(pte); - ret = mmu_topup_memory_cache(&cache, - kvm_mmu_cache_min_pages(kvm), - KVM_NR_MEM_OBJS); + ret = kvm_mmu_topup_memory_cache(&cache, + kvm_mmu_cache_min_pages(kvm)); if (ret) goto out; spin_lock(&kvm->mmu_lock); @@ -1496,7 +1463,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, } out: - mmu_free_memory_cache(&cache); + kvm_mmu_free_memory_cache(&cache); return ret; } @@ -1882,8 +1849,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, mmap_read_unlock(current->mm); /* We need minimum second+third level pages */ - ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm), - KVM_NR_MEM_OBJS); + ret = kvm_mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm)); if (ret) return ret; @@ -2050,7 +2016,6 @@ out: /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer - * @run: the kvm_run structure * * Any abort that gets to the host is almost guaranteed to be caused by a * missing second stage translation table entry, which can mean that either the @@ -2059,7 +2024,7 @@ out: * space. The distinction is based on the IPA causing the fault and whether this * memory region has been registered as standard RAM by user space. */ -int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) { unsigned long fault_status; phys_addr_t fault_ipa; @@ -2138,7 +2103,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) * of the page size. */ fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); - ret = io_mem_abort(vcpu, run, fault_ipa); + ret = io_mem_abort(vcpu, fault_ipa); goto out_unlock; } @@ -2307,7 +2272,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long h |