diff options
Diffstat (limited to 'arch/arm64/kvm')
39 files changed, 1741 insertions, 1216 deletions
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 1504c81fbf5d..60fd181df624 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -13,10 +13,10 @@ obj-$(CONFIG_KVM) += hyp/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ $(KVM)/vfio.o $(KVM)/irqchip.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ - inject_fault.o regmap.o va_layout.o handle_exit.o \ + inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ vgic-sys-reg-v3.o fpsimd.o pmu.o \ - aarch32.o arch_timer.o \ + arch_timer.o \ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c deleted file mode 100644 index 40a62a99fbf8..000000000000 --- a/arch/arm64/kvm/aarch32.c +++ /dev/null @@ -1,232 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * (not much of an) Emulation layer for 32bit guests. - * - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * based on arch/arm/kvm/emulate.c - * Copyright (C) 2012 - Virtual Open Systems and Columbia University - * Author: Christoffer Dall <c.dall@virtualopensystems.com> - */ - -#include <linux/bits.h> -#include <linux/kvm_host.h> -#include <asm/kvm_emulate.h> -#include <asm/kvm_hyp.h> - -#define DFSR_FSC_EXTABT_LPAE 0x10 -#define DFSR_FSC_EXTABT_nLPAE 0x08 -#define DFSR_LPAE BIT(9) - -/* - * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. - */ -static const u8 return_offsets[8][2] = { - [0] = { 0, 0 }, /* Reset, unused */ - [1] = { 4, 2 }, /* Undefined */ - [2] = { 0, 0 }, /* SVC, unused */ - [3] = { 4, 4 }, /* Prefetch abort */ - [4] = { 8, 8 }, /* Data abort */ - [5] = { 0, 0 }, /* HVC, unused */ - [6] = { 4, 4 }, /* IRQ, unused */ - [7] = { 4, 4 }, /* FIQ, unused */ -}; - -static bool pre_fault_synchronize(struct kvm_vcpu *vcpu) -{ - preempt_disable(); - if (vcpu->arch.sysregs_loaded_on_cpu) { - kvm_arch_vcpu_put(vcpu); - return true; - } - - preempt_enable(); - return false; -} - -static void post_fault_synchronize(struct kvm_vcpu *vcpu, bool loaded) -{ - if (loaded) { - kvm_arch_vcpu_load(vcpu, smp_processor_id()); - preempt_enable(); - } -} - -/* - * When an exception is taken, most CPSR fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). - * - * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with - * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was - * obsoleted by the ARMv7 virtualization extensions and is RES0. - * - * For the SPSR layout seen from AArch32, see: - * - ARM DDI 0406C.d, page B1-1148 - * - ARM DDI 0487E.a, page G8-6264 - * - * For the SPSR_ELx layout for AArch32 seen from AArch64, see: - * - ARM DDI 0487E.a, page C5-426 - * - * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from - * MSB to LSB. - */ -static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) -{ - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - unsigned long old, new; - - old = *vcpu_cpsr(vcpu); - new = 0; - - new |= (old & PSR_AA32_N_BIT); - new |= (old & PSR_AA32_Z_BIT); - new |= (old & PSR_AA32_C_BIT); - new |= (old & PSR_AA32_V_BIT); - new |= (old & PSR_AA32_Q_BIT); - - // CPSR.IT[7:0] are set to zero upon any exception - // See ARM DDI 0487E.a, section G1.12.3 - // See ARM DDI 0406C.d, section B1.8.3 - - new |= (old & PSR_AA32_DIT_BIT); - - // CPSR.SSBS is set to SCTLR.DSSBS upon any exception - // See ARM DDI 0487E.a, page G8-6244 - if (sctlr & BIT(31)) - new |= PSR_AA32_SSBS_BIT; - - // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 - // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented - // See ARM DDI 0487E.a, page G8-6246 - new |= (old & PSR_AA32_PAN_BIT); - if (!(sctlr & BIT(23))) - new |= PSR_AA32_PAN_BIT; - - // SS does not exist in AArch32, so ignore - - // CPSR.IL is set to zero upon any exception - // See ARM DDI 0487E.a, page G1-5527 - - new |= (old & PSR_AA32_GE_MASK); - - // CPSR.IT[7:0] are set to zero upon any exception - // See prior comment above - - // CPSR.E is set to SCTLR.EE upon any exception - // See ARM DDI 0487E.a, page G8-6245 - // See ARM DDI 0406C.d, page B4-1701 - if (sctlr & BIT(25)) - new |= PSR_AA32_E_BIT; - - // CPSR.A is unchanged upon an exception to Undefined, Supervisor - // CPSR.A is set upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_A_BIT); - if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) - new |= PSR_AA32_A_BIT; - - // CPSR.I is set upon any exception - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= PSR_AA32_I_BIT; - - // CPSR.F is set upon an exception to FIQ - // CPSR.F is unchanged upon an exception to other modes - // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 - // See ARM DDI 0406C.d, page B1-1182 - new |= (old & PSR_AA32_F_BIT); - if (mode == PSR_AA32_MODE_FIQ) - new |= PSR_AA32_F_BIT; - - // CPSR.T is set to SCTLR.TE upon any exception - // See ARM DDI 0487E.a, page G8-5514 - // See ARM DDI 0406C.d, page B1-1181 - if (sctlr & BIT(30)) - new |= PSR_AA32_T_BIT; - - new |= mode; - - return new; -} - -static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) -{ - unsigned long spsr = *vcpu_cpsr(vcpu); - bool is_thumb = (spsr & PSR_AA32_T_BIT); - u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; - u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); - - *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); - - /* Note: These now point to the banked copies */ - vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); - *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; - - /* Branch to exception vector */ - if (sctlr & (1 << 13)) - vect_offset += 0xffff0000; - else /* always have security exceptions */ - vect_offset += vcpu_cp15(vcpu, c12_VBAR); - - *vcpu_pc(vcpu) = vect_offset; -} - -void kvm_inject_undef32(struct kvm_vcpu *vcpu) -{ - bool loaded = pre_fault_synchronize(vcpu); - - prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4); - post_fault_synchronize(vcpu, loaded); -} - -/* - * Modelled after TakeDataAbortException() and TakePrefetchAbortException - * pseudocode. - */ -static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, - unsigned long addr) -{ - u32 vect_offset; - u32 *far, *fsr; - bool is_lpae; - bool loaded; - - loaded = pre_fault_synchronize(vcpu); - - if (is_pabt) { - vect_offset = 12; - far = &vcpu_cp15(vcpu, c6_IFAR); - fsr = &vcpu_cp15(vcpu, c5_IFSR); - } else { /* !iabt */ - vect_offset = 16; - far = &vcpu_cp15(vcpu, c6_DFAR); - fsr = &vcpu_cp15(vcpu, c5_DFSR); - } - - prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); - - *far = addr; - - /* Give the guest an IMPLEMENTATION DEFINED exception */ - is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); - if (is_lpae) { - *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; - } else { - /* no need to shuffle FS[4] into DFSR[10] as its 0 */ - *fsr = DFSR_FSC_EXTABT_nLPAE; - } - - post_fault_synchronize(vcpu, loaded); -} - -void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, false, addr); -} - -void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr) -{ - inject_abt32(vcpu, true, addr); -} diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index c0ffb019ca8b..6e637d2b4cfb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -19,6 +19,7 @@ #include <linux/kvm_irqfd.h> #include <linux/irqbypass.h> #include <linux/sched/stat.h> +#include <linux/psci.h> #include <trace/events/kvm.h> #define CREATE_TRACE_POINTS @@ -35,7 +36,6 @@ #include <asm/kvm_asm.h> #include <asm/kvm_mmu.h> #include <asm/kvm_emulate.h> -#include <asm/kvm_coproc.h> #include <asm/sections.h> #include <kvm/arm_hypercalls.h> @@ -46,10 +46,14 @@ __asm__(".arch_extension virt"); #endif +static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; +DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); + DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); @@ -61,6 +65,10 @@ static bool vgic_present; static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); +extern u64 kvm_nvhe_sym(__cpu_logical_map)[NR_CPUS]; +extern u32 kvm_nvhe_sym(kvm_host_psci_version); +extern struct psci_0_1_function_ids kvm_nvhe_sym(kvm_host_psci_0_1_function_ids); + int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; @@ -102,7 +110,7 @@ static int kvm_arm_default_max_vcpus(void) return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; } -static void set_default_csv2(struct kvm *kvm) +static void set_default_spectre(struct kvm *kvm) { /* * The default is to expose CSV2 == 1 if the HW isn't affected. @@ -114,6 +122,8 @@ static void set_default_csv2(struct kvm *kvm) */ if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) kvm->arch.pfr0_csv2 = 1; + if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) + kvm->arch.pfr0_csv3 = 1; } /** @@ -141,7 +151,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); - set_default_csv2(kvm); + set_default_spectre(kvm); return ret; out_free_stage2_pgd: @@ -198,6 +208,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: case KVM_CAP_ARM_NISV_TO_USER: case KVM_CAP_ARM_INJECT_EXT_DABT: + case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_VCPU_ATTRIBUTES: r = 1; break; case KVM_CAP_ARM_SET_DEVICE_ADDR: @@ -229,10 +241,35 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_STEAL_TIME: r = kvm_arm_pvtime_supported(); break; - default: - r = kvm_arch_vm_ioctl_check_extension(kvm, ext); + case KVM_CAP_ARM_EL1_32BIT: + r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); + break; + case KVM_CAP_GUEST_DEBUG_HW_BPS: + r = get_num_brps(); + break; + case KVM_CAP_GUEST_DEBUG_HW_WPS: + r = get_num_wrps(); + break; + case KVM_CAP_ARM_PMU_V3: + r = kvm_arm_support_pmu_v3(); + break; + case KVM_CAP_ARM_INJECT_SERROR_ESR: + r = cpus_have_const_cap(ARM64_HAS_RAS_EXTN); break; + case KVM_CAP_ARM_VM_IPA_SIZE: + r = get_kvm_ipa_limit(); + break; + case KVM_CAP_ARM_SVE: + r = system_supports_sve(); + break; + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + case KVM_CAP_ARM_PTRAUTH_GENERIC: + r = system_has_full_ptr_auth(); + break; + default: + r = 0; } + return r; } @@ -1311,47 +1348,52 @@ static unsigned long nvhe_percpu_order(void) return size ? get_order(size) : 0; } -static int kvm_map_vectors(void) +/* A lookup table holding the hypervisor VA for each vector slot */ +static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS]; + +static int __kvm_vector_slot2idx(enum arm64_hyp_spectre_vector slot) { - /* - * SV2 = ARM64_SPECTRE_V2 - * HEL2 = ARM64_HARDEN_EL2_VECTORS - * - * !SV2 + !HEL2 -> use direct vectors - * SV2 + !HEL2 -> use hardened vectors in place - * !SV2 + HEL2 -> allocate one vector slot and use exec mapping - * SV2 + HEL2 -> use hardened vectors and use exec mapping - */ - if (cpus_have_const_cap(ARM64_SPECTRE_V2)) { - __kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs); - __kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base); - } + return slot - (slot != HYP_VECTOR_DIRECT); +} - if (cpus_have_const_cap(ARM64_HARDEN_EL2_VECTORS)) { - phys_addr_t vect_pa = __pa_symbol(__bp_harden_hyp_vecs); - unsigned long size = __BP_HARDEN_HYP_VECS_SZ; +static void kvm_init_vector_slot(void *base, enum arm64_hyp_spectre_vector slot) +{ + int idx = __kvm_vector_slot2idx(slot); - /* - * Always allocate a spare vector slot, as we don't - * know yet which CPUs have a BP hardening slot that - * we can reuse. - */ - __kvm_harden_el2_vector_slot = atomic_inc_return(&arm64_el2_vector_last_slot); - BUG_ON(__kvm_harden_el2_vector_slot >= BP_HARDEN_EL2_SLOTS); - return create_hyp_exec_mappings(vect_pa, size, - &__kvm_bp_vect_base); + hyp_spectre_vector_selector[slot] = base + (idx * SZ_2K); +} + +static int kvm_init_vector_slots(void) +{ + int err; + void *base; + + base = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector)); + kvm_init_vector_slot(base, HYP_VECTOR_DIRECT); + + base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); + + if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) + return 0; + + if (!has_vhe()) { + err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), + __BP_HARDEN_HYP_VECS_SZ, &base); + if (err) + return err; } + kvm_init_vector_slot(base, HYP_VECTOR_INDIRECT); + kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_INDIRECT); return 0; } static void cpu_init_hyp_mode(void) { - phys_addr_t pgd_ptr; - unsigned long hyp_stack_ptr; - unsigned long vector_ptr; - unsigned long tpidr_el2; + struct kvm_nvhe_init_params *params = this_cpu_ptr_nvhe_sym(kvm_init_params); struct arm_smccc_res res; + unsigned long tcr; /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); @@ -1361,13 +1403,38 @@ static void cpu_init_hyp_mode(void) * kernel's mapping to the linear mapping, and store it in tpidr_el2 * so that we can use adr_l to access per-cpu variables in EL2. */ - tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - - (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); + params->tpidr_el2 = (unsigned long)this_cpu_ptr_nvhe_sym(__per_cpu_start) - + (unsigned long)kvm_ksym_ref(CHOOSE_NVHE_SYM(__per_cpu_start)); + + params->mair_el2 = read_sysreg(mair_el1); + + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, at EL2, there is only one TTBR register, and we can't switch + * between translation tables *and* update TCR_EL2.T0SZ at the same + * time. Bottom line: we need to use the extended range with *both* our + * translation tables. + * + * So use the same T0SZ value we use for the ID map. + */ + tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1; + tcr &= ~TCR_T0SZ_MASK; + tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; + params->tcr_el2 = tcr; + + params->stack_hyp_va = kern_hyp_va(__this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE); + params->pgd_pa = kvm_mmu_get_httbr(); - pgd_ptr = kvm_mmu_get_httbr(); - hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; - hyp_stack_ptr = kern_hyp_va(hyp_stack_ptr); - vector_ptr = (unsigned long)kern_hyp_va(kvm_ksym_ref(__kvm_hyp_host_vector)); + /* + * Flush the init params from the data cache because the struct will + * be read while the MMU is off. + */ + kvm_flush_dcache_to_poc(params, sizeof(*params)); /* * Call initialization code, and switch to the full blown HYP code. @@ -1376,8 +1443,7 @@ static void cpu_init_hyp_mode(void) * cpus_have_const_cap() wrapper. */ BUG_ON(!system_capabilities_finalized()); - arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), - pgd_ptr, tpidr_el2, hyp_stack_ptr, vector_ptr, &res); + arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__kvm_hyp_init), virt_to_phys(params), &res); WARN_ON(res.a0 != SMCCC_RET_SUCCESS); /* @@ -1396,13 +1462,40 @@ static void cpu_hyp_reset(void) __hyp_reset_vectors(); } +/* + * EL2 vectors can be mapped and rerouted in a number of ways, + * depending on the kernel configuration and CPU present: + * + * - If the CPU is affected by Spectre-v2, the hardening sequence is + * placed in one of the vector slots, which is executed before jumping + * to the real vectors. + * + * - If the CPU also has the ARM64_SPECTRE_V3A cap, the slot + * containing the hardening sequence is mapped next to the idmap page, + * and executed before jumping to the real vectors. + * + * - If the CPU only has the ARM64_SPECTRE_V3A cap, then an + * empty slot is selected, mapped next to the idmap page, and + * executed before jumping to the real vectors. + * + * Note that ARM64_SPECTRE_V3A is somewhat incompatible with + * VHE, as we don't have hypervisor-specific mappings. If the system + * is VHE and yet selects this capability, it will be ignored. + */ +static void cpu_set_hyp_vector(void) +{ + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + void *vector = hyp_spectre_vector_selector[data->slot]; + + *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)vector; +} + static void cpu_hyp_reinit(void) { kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); cpu_hyp_reset(); - - *this_cpu_ptr_hyp_sym(kvm_hyp_vector) = (unsigned long)kvm_get_hyp_vector(); + cpu_set_hyp_vector(); if (is_kernel_in_hyp_mode()) kvm_timer_init_vhe(); @@ -1439,7 +1532,8 @@ static void _kvm_arch_hardware_disable(void *discard) void kvm_arch_hardware_disable(void) { - _kvm_arch_hardware_disable(NULL); + if (!is_protected_kvm_enabled()) + _kvm_arch_hardware_disable(NULL); } #ifdef CONFIG_CPU_PM @@ -1482,11 +1576,13 @@ static struct notifier_block hyp_init_cpu_pm_nb = { static void __init hyp_cpu_pm_init(void) { - cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); + if (!is_protected_kvm_enabled()) + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); } static void __init hyp_cpu_pm_exit(void) { - cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); + if (!is_protected_kvm_enabled()) + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); } #else static inline void hyp_cpu_pm_init(void) @@ -1497,6 +1593,36 @@ static inline void hyp_cpu_pm_exit(void) } #endif +static void init_cpu_logical_map(void) +{ + unsigned int cpu; + + /* + * Copy the MPIDR <-> logical CPU ID mapping to hyp. + * Only copy the set of online CPUs whose features have been chacked + * against the finalized system capabilities. The hypervisor will not + * allow any other CPUs from the `possible` set to boot. + */ + for_each_online_cpu(cpu) + kvm_nvhe_sym(__cpu_logical_map)[cpu] = cpu_logical_map(cpu); +} + +static bool init_psci_relay(void) +{ + /* + * If PSCI has not been initialized, protected KVM cannot install + * itself on newly booted CPUs. + */ + if (!psci_ops.get_version) { + kvm_err("Cannot initialize protected mode without PSCI\n"); + return false; + } + + kvm_nvhe_sym(kvm_host_psci_version) = psci_ops.get_version(); + kvm_nvhe_sym(kvm_host_psci_0_1_function_ids) = get_psci_0_1_function_ids(); + return true; +} + static int init_common_resources(void) { return kvm_set_ipa_limit(); @@ -1541,10 +1667,11 @@ static int init_subsystems(void) goto out; kvm_perf_init(); - kvm_coproc_table_init(); + kvm_sys_reg_table_init(); out: - on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); + if (err || !is_protected_kvm_enabled()) + on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); return err; } @@ -1618,6 +1745,14 @@ static int init_hyp_mode(void) goto out_err; } + err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_ro_after_init_start), + kvm_ksym_ref(__hyp_data_ro_after_init_end), + PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map .hyp.data..ro_after_init section\n"); + goto out_err; + } + err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); if (err) { @@ -1632,12 +1767,6 @@ static int init_hyp_mode(void) goto out_err; } - err = kvm_map_vectors(); - if (err) { - kvm_err("Cannot map vectors\n"); - goto out_err; - } - /* * Map the Hyp stack pages */ @@ -1667,6 +1796,13 @@ static int init_hyp_mode(void) } } + if (is_protected_kvm_enabled()) { + init_cpu_logical_map(); + + if (!init_psci_relay()) + goto out_err; + } + return 0; out_err: @@ -1781,14 +1917,24 @@ int kvm_arch_init(void *opaque) goto out_err; } + err = kvm_init_vector_slots(); + if (err) { + kvm_err("Cannot initialise vector slots\n"); + goto out_err; + } + err = init_subsystems(); if (err) goto out_hyp; - if (in_hyp_mode) + if (is_protected_kvm_enabled()) { + static_branch_enable(&kvm_protected_mode_initialized); + kvm_info("Protected nVHE mode initialized successfully\n"); + } else if (in_hyp_mode) { kvm_info("VHE mode initialized successfully\n"); - else + } else { kvm_info("Hyp mode initialized successfully\n"); + } return 0; @@ -1806,6 +1952,25 @@ void kvm_arch_exit(void) kvm_perf_teardown(); } +static int __init early_kvm_mode_cfg(char *arg) +{ + if (!arg) + return -EINVAL; + + if (strcmp(arg, "protected") == 0) { + kvm_mode = KVM_MODE_PROTECTED; + return 0; + } + + return -EINVAL; +} +early_param("kvm-arm.mode", early_kvm_mode_cfg); + +enum kvm_mode kvm_get_mode(void) +{ + return kvm_mode; +} + static int arm_init(void) { int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfb5218137ca..9bbd30e62799 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -24,7 +24,6 @@ #include <asm/fpsimd.h> #include <asm/kvm.h> #include <asm/kvm_emulate.h> -#include <asm/kvm_coproc.h> #include <asm/sigcontext.h> #include "trace.h" @@ -252,10 +251,32 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) memcpy(addr, valp, KVM_REG_SIZE(reg->id)); if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) { - int i; + int i, nr_reg; + + switch (*vcpu_cpsr(vcpu)) { + /* + * Either we are dealing with user mode, and only the + * first 15 registers (+ PC) must be narrowed to 32bit. + * AArch32 r0-r14 conveniently map to AArch64 x0-x14. + */ + case PSR_AA32_MODE_USR: + case PSR_AA32_MODE_SYS: + nr_reg = 15; + break; + + /* + * Otherwide, this is a priviledged mode, and *all* the + * registers must be narrowed to 32bit. + */ + default: + nr_reg = 31; + break; + } + + for (i = 0; i < nr_reg; i++) + vcpu_set_reg(vcpu, i, (u32)vcpu_get_reg(vcpu, i)); - for (i = 0; i < 16; i++) - *vcpu_reg32(vcpu, i) = (u32)*vcpu_reg32(vcpu, i); + *vcpu_pc(vcpu) = (u32)*vcpu_pc(vcpu); } out: return err; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 5d690d60ccad..cebe39f3b1b6 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -14,7 +14,6 @@ #include <asm/esr.h> #include <asm/exception.h> #include <asm/kvm_asm.h> -#include <asm/kvm_coproc.h> #include <asm/kvm_emulate.h> #include <asm/kvm_mmu.h> #include <asm/debug-monitors.h> @@ -61,7 +60,7 @@ static int handle_smc(struct kvm_vcpu *vcpu) * otherwise return to the same address... */ vcpu_set_reg(vcpu, 0, ~0UL); - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); return 1; } @@ -100,7 +99,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu) kvm_clear_request(KVM_REQ_UNHALT, vcpu); } - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); return 1; } @@ -221,7 +220,7 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu) * that fail their condition code check" */ if (!kvm_condition_valid(vcpu)) { - kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + kvm_incr_pc(vcpu); handled = 1; } else { exit_handle_fn exit_handler; @@ -241,23 +240,6 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; - if (ARM_SERROR_PENDING(exception_index)) { - u8 esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); - - /* - * HVC/SMC already have an adjusted PC, which we need - * to correct in order to return to after having - * injected the SError. - */ - if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64 || - esr_ec == ESR_ELx_EC_SMC32 || esr_ec == ESR_ELx_EC_SMC64) { - u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; - *vcpu_pc(vcpu) -= adj; - } - - return 1; - } - exception_index = ARM_EXCEPTION_CODE(exception_index); switch (exception_index) { diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 4a81eddabcd8..687598e41b21 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) |