summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorRadim Krčmář <rkrcmar@redhat.com>2018-03-28 16:09:09 +0200
committerRadim Krčmář <rkrcmar@redhat.com>2018-03-28 16:09:09 +0200
commitabe7a4586f0cf0ef35ed5f713ccfd4eb0770a5ac (patch)
treec5f5bae60fdd3973b0c5d14a6405a7b66dbb5121 /virt
parentd32ef547fdbbeb9c4351f9d3bc84dec998a3be8c (diff)
parentdc6ed61d2f824a595033744fc1f3bf4cb98768b5 (diff)
Merge tag 'kvm-arm-for-v4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm
KVM/ARM updates for v4.17 - VHE optimizations - EL2 address space randomization - Variant 3a mitigation for Cortex-A57 and A72 - The usual vgic fixes - Various minor tidying-up
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/aarch32.c2
-rw-r--r--virt/kvm/arm/arch_timer.c16
-rw-r--r--virt/kvm/arm/arm.c57
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c44
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c159
-rw-r--r--virt/kvm/arm/hyp/vgic-v3-sr.c246
-rw-r--r--virt/kvm/arm/mmu.c180
-rw-r--r--virt/kvm/arm/pmu.c36
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c17
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c15
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c3
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c163
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c75
-rw-r--r--virt/kvm/arm/vgic/vgic.c120
-rw-r--r--virt/kvm/arm/vgic/vgic.h6
15 files changed, 650 insertions, 489 deletions
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
index 8bc479fa37e6..efc84cbe8277 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/virt/kvm/arm/aarch32.c
@@ -178,7 +178,7 @@ static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
*vcpu_cpsr(vcpu) = cpsr;
/* Note: These now point to the banked copies */
- *vcpu_spsr(vcpu) = new_spsr_value;
+ vcpu_write_spsr(vcpu, new_spsr_value);
*vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
/* Branch to exception vector */
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 70f4c30918eb..bd3d57f40f1b 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -545,9 +545,11 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
* The kernel may decide to run userspace after calling vcpu_put, so
* we reset cntvoff to 0 to ensure a consistent read between user
* accesses to the virtual counter and kernel access to the physical
- * counter.
+ * counter of non-VHE case. For VHE, the virtual counter uses a fixed
+ * virtual offset of zero, so no need to zero CNTVOFF_EL2 register.
*/
- set_cntvoff(0);
+ if (!has_vhe())
+ set_cntvoff(0);
}
/*
@@ -581,6 +583,7 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
@@ -594,6 +597,9 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
ptimer->cnt_ctl = 0;
kvm_timer_update_state(vcpu);
+ if (timer->enabled && irqchip_in_kernel(vcpu->kvm))
+ kvm_vgic_reset_mapped_irq(vcpu, vtimer->irq.irq);
+
return 0;
}
@@ -767,7 +773,7 @@ int kvm_timer_hyp_init(bool has_gic)
static_branch_enable(&has_gic_active_state);
}
- kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
+ kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING,
"kvm/arm/timer:starting", kvm_timer_starting_cpu,
@@ -852,11 +858,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return ret;
no_vgic:
- preempt_disable();
timer->enabled = 1;
- kvm_timer_vcpu_load(vcpu);
- preempt_enable();
-
return 0;
}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 86941f6181bb..dba629c5f8ac 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -362,10 +362,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_arm_set_running_vcpu(vcpu);
kvm_vgic_load(vcpu);
kvm_timer_vcpu_load(vcpu);
+ kvm_vcpu_load_sysregs(vcpu);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ kvm_vcpu_put_sysregs(vcpu);
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
@@ -384,14 +386,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- vcpu_load(vcpu);
-
if (vcpu->arch.power_off)
mp_state->mp_state = KVM_MP_STATE_STOPPED;
else
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
- vcpu_put(vcpu);
return 0;
}
@@ -400,8 +399,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
{
int ret = 0;
- vcpu_load(vcpu);
-
switch (mp_state->mp_state) {
case KVM_MP_STATE_RUNNABLE:
vcpu->arch.power_off = false;
@@ -413,7 +410,6 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
ret = -EINVAL;
}
- vcpu_put(vcpu);
return ret;
}
@@ -426,7 +422,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
*/
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
- return ((!!v->arch.irq_lines || kvm_vgic_vcpu_pending_irq(v))
+ bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF);
+ return ((irq_lines || kvm_vgic_vcpu_pending_irq(v))
&& !v->arch.power_off && !v->arch.pause);
}
@@ -638,27 +635,22 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (unlikely(!kvm_vcpu_initialized(vcpu)))
return -ENOEXEC;
- vcpu_load(vcpu);
-
ret = kvm_vcpu_first_run_init(vcpu);
if (ret)
- goto out;
+ return ret;
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu, vcpu->run);
if (ret)
- goto out;
- if (kvm_arm_handle_step_debug(vcpu, vcpu->run)) {
- ret = 0;
- goto out;
- }
-
+ return ret;
+ if (kvm_arm_handle_step_debug(vcpu, vcpu->run))
+ return 0;
}
- if (run->immediate_exit) {
- ret = -EINTR;
- goto out;
- }
+ if (run->immediate_exit)
+ return -EINTR;
+
+ vcpu_load(vcpu);
kvm_sigset_activate(vcpu);
@@ -725,6 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
+ isb(); /* Ensure work in x_flush_hwstate is committed */
kvm_pmu_sync_hwstate(vcpu);
if (static_branch_unlikely(&userspace_irqchip_in_use))
kvm_timer_sync_hwstate(vcpu);
@@ -741,13 +734,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
- if (has_vhe())
- kvm_arm_vhe_guest_enter();
-
- ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
- if (has_vhe())
+ if (has_vhe()) {
+ kvm_arm_vhe_guest_enter();
+ ret = kvm_vcpu_run_vhe(vcpu);
kvm_arm_vhe_guest_exit();
+ } else {
+ ret = kvm_call_hyp(__kvm_vcpu_run_nvhe, vcpu);
+ }
+
vcpu->mode = OUTSIDE_GUEST_MODE;
vcpu->stat.exits++;
/*
@@ -817,7 +812,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
kvm_sigset_deactivate(vcpu);
-out:
vcpu_put(vcpu);
return ret;
}
@@ -826,18 +820,18 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
{
int bit_index;
bool set;
- unsigned long *ptr;
+ unsigned long *hcr;
if (number == KVM_ARM_IRQ_CPU_IRQ)
bit_index = __ffs(HCR_VI);
else /* KVM_ARM_IRQ_CPU_FIQ */
bit_index = __ffs(HCR_VF);
- ptr = (unsigned long *)&vcpu->arch.irq_lines;
+ hcr = vcpu_hcr(vcpu);
if (level)
- set = test_and_set_bit(bit_index, ptr);
+ set = test_and_set_bit(bit_index, hcr);
else
- set = test_and_clear_bit(bit_index, ptr);
+ set = test_and_clear_bit(bit_index, hcr);
/*
* If we didn't change anything, no need to wake up or kick other CPUs
@@ -1036,8 +1030,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
struct kvm_device_attr attr;
long r;
- vcpu_load(vcpu);
-
switch (ioctl) {
case KVM_ARM_VCPU_INIT: {
struct kvm_vcpu_init init;
@@ -1114,7 +1106,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EINVAL;
}
- vcpu_put(vcpu);
return r;
}
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index f24404b3c8df..77754a62eb0c 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -27,34 +27,34 @@ void __hyp_text __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high)
write_sysreg(cntvoff, cntvoff_el2);
}
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu)
{
- /*
- * We don't need to do this for VHE since the host kernel runs in EL2
- * with HCR_EL2.TGE ==1, which makes those bits have no impact.
- */
- if (!has_vhe()) {
- u64 val;
+ u64 val;
- /* Allow physical timer/counter access for the host */
- val = read_sysreg(cnthctl_el2);
- val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
- write_sysreg(val, cnthctl_el2);
- }
+ /* Allow physical timer/counter access for the host */
+ val = read_sysreg(cnthctl_el2);
+ val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+ write_sysreg(val, cnthctl_el2);
}
+/*
+ * Should only be called on non-VHE systems.
+ * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe().
+ */
void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu)
{
- if (!has_vhe()) {
- u64 val;
+ u64 val;
- /*
- * Disallow physical timer access for the guest
- * Physical counter access is allowed
- */
- val = read_sysreg(cnthctl_el2);
- val &= ~CNTHCTL_EL1PCEN;
- val |= CNTHCTL_EL1PCTEN;
- write_sysreg(val, cnthctl_el2);
- }
+ /*
+ * Disallow physical timer access for the guest
+ * Physical counter access is allowed
+ */
+ val = read_sysreg(cnthctl_el2);
+ val &= ~CNTHCTL_EL1PCEN;
+ val |= CNTHCTL_EL1PCTEN;
+ write_sysreg(val, cnthctl_el2);
}
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
deleted file mode 100644
index 4fe6e797e8b3..000000000000
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Copyright (C) 2012-2015 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/compiler.h>
-#include <linux/irqchip/arm-gic.h>
-#include <linux/kvm_host.h>
-
-#include <asm/kvm_emulate.h>
-#include <asm/kvm_hyp.h>
-#include <asm/kvm_mmu.h>
-
-static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr;
- u32 elrsr0, elrsr1;
-
- elrsr0 = readl_relaxed(base + GICH_ELRSR0);
- if (unlikely(nr_lr > 32))
- elrsr1 = readl_relaxed(base + GICH_ELRSR1);
- else
- elrsr1 = 0;
-
- cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
-}
-
-static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int i;
- u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-
- for (i = 0; i < used_lrs; i++) {
- if (cpu_if->vgic_elrsr & (1UL << i))
- cpu_if->vgic_lr[i] &= ~GICH_LR_STATE;
- else
- cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
-
- writel_relaxed(0, base + GICH_LR0 + (i * 4));
- }
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-
- if (!base)
- return;
-
- if (used_lrs) {
- cpu_if->vgic_apr = readl_relaxed(base + GICH_APR);
-
- save_elrsr(vcpu, base);
- save_lrs(vcpu, base);
-
- writel_relaxed(0, base + GICH_HCR);
- } else {
- cpu_if->vgic_elrsr = ~0UL;
- cpu_if->vgic_apr = 0;
- }
-}
-
-/* vcpu is already in the HYP VA space */
-void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- struct vgic_dist *vgic = &kvm->arch.vgic;
- void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- int i;
- u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
-
- if (!base)
- return;
-
- if (used_lrs) {
- writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
- writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
- for (i = 0; i < used_lrs; i++) {
- writel_relaxed(cpu_if->vgic_lr[i],
- base + GICH_LR0 + (i * 4));
- }
- }
-}
-
-#ifdef CONFIG_ARM64
-/*
- * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the
- * guest.
- *
- * @vcpu: the offending vcpu
- *
- * Returns:
- * 1: GICV access successfully performed
- * 0: Not a GICV access
- * -1: Illegal GICV access
- */
-int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu)
-{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
- struct vgic_dist *vgic = &kvm->arch.vgic;
- phys_addr_t fault_ipa;
- void __iomem *addr;
- int rd;
-
- /* Build the full address */
- fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
- fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
-
- /* If not for GICV, move on */
- if (fault_ipa < vgic->vgic_cpu_base ||
- fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE))
- return 0;
-
- /* Reject anything but a 32bit access */
- if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32))
- return -1;
-
- /* Not aligned? Don't bother */
- if (fault_ipa & 3)
- return -1;
-
- rd = kvm_vcpu_dabt_get_rd(vcpu);
- addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va);
- addr += fault_ipa - vgic->vgic_cpu_base;
-
- if (kvm_vcpu_dabt_iswrite(vcpu)) {
- u32 data = vcpu_data_guest_to_host(vcpu,
- vcpu_get_reg(vcpu, rd),
- sizeof(u32));
- writel_relaxed(data, addr);
- } else {
- u32 data = readl_relaxed(addr);
- vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data,
- sizeof(u32)));
- }
-
- return 1;
-}
-#endif
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index f5c3d6d7019e..616e5a433ab0 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -21,6 +21,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
@@ -208,88 +209,68 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
- u64 val;
/*
* Make sure stores to the GIC via the memory mapped interface
- * are now visible to the system register interface.
+ * are now visible to the system register interface when reading the
+ * LRs, and when reading back the VMCR on non-VHE systems.
*/
- if (!cpu_if->vgic_sre) {
- dsb(st);
- cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
+ if (used_lrs || !has_vhe()) {
+ if (!cpu_if->vgic_sre) {
+ dsb(sy);
+ isb();
+ }
}
if (used_lrs) {
int i;
- u32 nr_pre_bits;
+ u32 elrsr;
- cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
+ elrsr = read_gicreg(ICH_ELSR_EL2);
- write_gicreg(0, ICH_HCR_EL2);
- val = read_gicreg(ICH_VTR_EL2);
- nr_pre_bits = vtr_to_nr_pre_bits(val);
+ write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2);
for (i = 0; i < used_lrs; i++) {
- if (cpu_if->vgic_elrsr & (1 << i))
+ if (elrsr & (1 << i))
cpu_if->vgic_lr[i] &= ~ICH_LR_STATE;
else
cpu_if->vgic_lr[i] = __gic_v3_get_lr(i);
__gic_v3_set_lr(0, i);
}
+ }
+}
- switch (nr_pre_bits) {
- case 7:
- cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
- cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
- case 6:
- cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
- default:
- cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
- }
+void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+ int i;
- switch (nr_pre_bits) {
- case 7:
- cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
- cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
- case 6:
- cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
- default:
- cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
- }
- } else {
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
- write_gicreg(0, ICH_HCR_EL2);
-
- cpu_if->vgic_elrsr = 0xffff;
- cpu_if->vgic_ap0r[0] = 0;
- cpu_if->vgic_ap0r[1] = 0;
- cpu_if->vgic_ap0r[2] = 0;
- cpu_if->vgic_ap0r[3] = 0;
- cpu_if->vgic_ap1r[0] = 0;
- cpu_if->vgic_ap1r[1] = 0;
- cpu_if->vgic_ap1r[2] = 0;
- cpu_if->vgic_ap1r[3] = 0;
- }
+ if (used_lrs) {
+ write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
- val = read_gicreg(ICC_SRE_EL2);
- write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+ for (i = 0; i < used_lrs; i++)
+ __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
+ }
- if (!cpu_if->vgic_sre) {
- /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
- isb();
- write_gicreg(1, ICC_SRE_EL1);
+ /*
+ * Ensure that writes to the LRs, and on non-VHE systems ensure that
+ * the write to the VMCR in __vgic_v3_activate_traps(), will have
+ * reached the (re)distributors. This ensure the guest will read the
+ * correct values from the memory-mapped interface.
+ */
+ if (used_lrs || !has_vhe()) {
+ if (!cpu_if->vgic_sre) {
+ isb();
+ dsb(sy);
+ }
}
}
-void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+void __hyp_text __vgic_v3_activate_traps(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs;
- u64 val;
- u32 nr_pre_bits;
- int i;
/*
* VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
@@ -298,70 +279,135 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
* consequences. So we must make sure that ICC_SRE_EL1 has
* been actually programmed with the value we want before
* starting to mess with the rest of the GIC, and VMCR_EL2 in
- * particular.
+ * particular. This logic must be called before
+ * __vgic_v3_restore_state().
*/
if (!cpu_if->vgic_sre) {
write_gicreg(0, ICC_SRE_EL1);
isb();
write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
+
+
+ if (has_vhe()) {
+ /*
+ * Ensure that the write to the VMCR will have reached
+ * the (re)distributors. This ensure the guest will
+ * read the correct values from the memory-mapped
+ * interface.
+ */
+ isb();
+ dsb(sy);
+ }
}
- val = read_gicreg(ICH_VTR_EL2);
- nr_pre_bits = vtr_to_nr_pre_bits(val);
+ /*
+ * Prevent the guest from touching the GIC system registers if
+ * SRE isn't enabled for GICv3 emulation.
+ */
+ write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+ ICC_SRE_EL2);
- if (used_lrs) {
+ /*
+ * If we need to trap system registers, we must write
+ * ICH_HCR_EL2 anyway, even if no interrupts are being
+ * injected,
+ */
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+ cpu_if->its_vpe.its_vm)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+}
- switch (nr_pre_bits) {
- case 7:
- __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
- __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
- case 6:
- __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
- default:
- __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
- }
-
- switch (nr_pre_bits) {
- case 7:
- __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
- __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
- case 6:
- __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
- default:
- __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
- }
+void __hyp_text __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ u64 val;
- for (i = 0; i < used_lrs; i++)
- __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
- } else {
- /*
- * If we need to trap system registers, we must write
- * ICH_HCR_EL2 anyway, even if no interrupts are being
- * injected. Same thing if GICv4 is used, as VLPI
- * delivery is gated by ICH_HCR_EL2.En.
- */
- if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
- cpu_if->its_vpe.its_vm)
- write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+ if (!cpu_if->vgic_sre) {
+ cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2);
}
- /*
- * Ensures that the above will have reached the
- * (re)distributors. This ensure the guest will read the
- * correct values from the memory-mapped interface.
- */
+ val = read_gicreg(ICC_SRE_EL2);
+ write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+
if (!cpu_if->vgic_sre) {
+ /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
isb();
- dsb(sy);
+ write_gicreg(1, ICC_SRE_EL1);
}
/*
- * Prevent the guest from touching the GIC system registers if
- * SRE isn't enabled for GICv3 emulation.
+ * If we were trapping system registers, we enabled the VGIC even if
+ * no interrupts were being injected, and we disable it again here.
*/
- write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
- ICC_SRE_EL2);
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+ cpu_if->its_vpe.its_vm)
+ write_gicreg(0, ICH_HCR_EL2);
+}
+
+void __hyp_text __vgic_v3_save_aprs(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ u64 val;
+ u32 nr_pre_bits;
+
+ vcpu = kern_hyp_va(vcpu);
+ cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ val = read_gicreg(ICH_VTR_EL2);
+ nr_pre_bits = vtr_to_nr_pre_bits(val);
+
+ switch (nr_pre_bits) {
+ case 7:
+ cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
+ cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
+ case 6:
+ cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
+ default:
+ cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
+ }
+
+ switch (nr_pre_bits) {
+ case 7:
+ cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
+ cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
+ case 6:
+ cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
+ default:
+ cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
+ }
+}
+
+void __hyp_text __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpu_if;
+ u64 val;
+ u32 nr_pre_bits;
+
+ vcpu = kern_hyp_va(vcpu);
+ cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ val = read_gicreg(ICH_VTR_EL2);
+ nr_pre_bits = vtr_to_nr_pre_bits(val);
+
+ switch (nr_pre_bits) {
+ case 7:
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
+ case 6:
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
+ default:
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
+ }
+
+ switch (nr_pre_bits) {
+ case 7:
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
+ case 6:
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
+ default:
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
+ }
}
void __hyp_text __vgic_v3_init_lrs(void)
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index ec62d1cccab7..7f6a944db23d 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -43,6 +43,8 @@ static unsigned long hyp_idmap_start;
static unsigned long hyp_idmap_end;
static phys_addr_t hyp_idmap_vector;
+static unsigned long io_map_base;
+
#define S2_PGD_SIZE (PTRS_PER_S2_PGD * sizeof(pgd_t))
#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t))
@@ -479,7 +481,13 @@ static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end)
clear_hyp_pgd_entry(pgd);
}
-static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd)
+{
+ return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1);
+}
+
+static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd,
+ phys_addr_t start, u64 size)
{
pgd_t *pgd;
phys_addr_t addr = start, end = start + size;
@@ -489,7 +497,7 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
* We don't unmap anything from HYP, except at the hyp tear down.
* Hence, we don't have to invalidate the TLBs here.
*/
- pgd = pgdp + pgd_index(addr);
+ pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
do {
next = pgd_addr_end(addr, end);
if (!pgd_none(*pgd))
@@ -497,32 +505,50 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
} while (pgd++, addr = next, addr != end);
}
+static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+ __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size);
+}
+
+static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size)
+{
+ __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size);
+}
+
/**
* free_hyp_pgds - free Hyp-mode page tables
*
* Assumes hyp_pgd is a page table used strictly in Hyp-mode and
* therefore contains either mappings in the kernel memory area (above
- * PAGE_OFFSET), or device mappings in the vmalloc range (from
- * VMALLOC_START to VMALLOC_END).
+ * PAGE_OFFSET), or device mappings in the idmap range.
*
- * boot_hyp_pgd should only map two pages for the init code.
+ * boot_hyp_pgd should only map the idmap range, and is only used in
+ * the extended idmap case.
*/
void free_hyp_pgds(void)
{
+ pgd_t *id_pgd;
+
mutex_lock(&kvm_hyp_pgd_mutex);
+ id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd;
+
+ if (id_pgd) {
+ /* In case we never called hyp_mmu_init() */
+ if (!io_map_base)
+ io_map_base = hyp_idmap_start;
+ unmap_hyp_idmap_range(id_pgd, io_map_base,
+ hyp_idmap_start + PAGE_SIZE - io_map_base);
+ }
+
if (boot_hyp_pgd) {
- unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE);
free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order);
boot_hyp_pgd = NULL;
}
if (hyp_pgd) {
- unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE);
unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET),
(uintptr_t)high_memory - PAGE_OFFSET);
- unmap_hyp_range(hyp_pgd, kern_hyp_va(VMALLOC_START),
- VMALLOC_END - VMALLOC_START);
free_pages((unsigned long)hyp_pgd, hyp_pgd_order);
hyp_pgd = NULL;
@@ -634,7 +660,7 @@ static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd,
addr = start & PAGE_MASK;
end = PAGE_ALIGN(end);
do {
- pgd = pgdp + ((addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1));
+ pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd);
if (pgd_none(*pgd)) {
pud = pud_alloc_one(NULL, addr);
@@ -708,29 +734,115 @@ int create_hyp_mappings(void *from, void *to, pgprot_t prot)
return 0;
}
+static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size,
+ unsigned long *haddr, pgprot_t prot)
+{
+ pgd_t *pgd = hyp_pgd;
+ unsigned long base;
+ int ret = 0;
+
+ mutex_lock(&kvm_hyp_pgd_mutex);
+
+ /*
+ * This assumes that we we have enough space below the idmap
+ * page to allocate our VAs. If not, the check below will
+ * kick. A potential alternative would be to detect that
+ * overflow and switch to an allocation above the idmap.
+ *
+ * The allocated size is always a multiple of PAGE_SIZE.
+ */
+ size = PAGE_ALIGN(size + offset_in_page(phys_addr));
+ base = io_map_base - size;
+
+ /*
+ * Verify that BIT(VA_BITS - 1) hasn't been flipped by
+ * allocating the new area, as it would indicate we've
+ * overflowed the idmap/IO address range.
+ */
+ if ((base ^ io_map_base) & BIT(VA_BITS - 1))
+ ret = -ENOMEM;
+ else
+ io_map_base = base;
+
+ mutex_unlock(&kvm_hyp_pgd_mutex);
+
+ if (ret)
+ goto out;
+
+ if (__kvm_cpu_uses_extended_idmap())
+ pgd = boot_hyp_pgd;
+
+ ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(),
+ base, base + size,
+ __phys_to_pfn(phys_addr), prot);
+ if (ret)
+ goto out;
+
+ *haddr = base + offset_in_page(phys_addr);
+
+out:
+ return ret;
+}
+
/**
- * create_hyp_io_mappings - duplicate a kernel IO mapping into Hyp mode
- * @from: The kernel start VA of the range
- * @to: The kernel end VA of the range (exclusive)
+ * create_hyp_io_mappings - Map IO into both kernel and HYP
* @phys_addr: The physical start address which gets mapped
- *
- * The resulting HYP VA is the same as the kernel VA, modulo
- * HYP_PAGE_OFFSET.
+ * @size: Size of the region being mapped
+ * @kaddr: Kernel VA for this mapping
+ * @haddr: HYP VA for this mapping
*/
-int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
+int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
+ void __iomem **kaddr,
+ void __iomem **haddr)
{
- unsigned long start = kern_hyp_va((unsigned long)from);
- unsigned long end = kern_hyp_va((unsigned long)to);
+ unsigned long addr;
+ int ret;
- if (is_kernel_in_hyp_mode())
+ *kaddr = ioremap(phys_addr, size);
+ if (!*kaddr)
+ return -ENOMEM;
+
+ if (is_kernel_in_hyp_mode()) {
+ *haddr = *kaddr;
return 0;
+ }
- /* Check for a valid kernel IO mapping */
- if (!is_vmalloc_addr(from) || !is_vmalloc_addr(to - 1))
- return -EINVAL;
+ ret = __create_hyp_private_mapping(phys_addr, size,
+ &addr, PAGE_HYP_DEVICE);
+ if (ret) {
+ iounmap(*kaddr);
+ *kaddr = NULL;
+ *haddr = NULL;
+ return ret;
+ }
+
+ *haddr = (void __iomem *)addr;
+ return 0;
+}
- return __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, start, end,
- __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
+/**
+ * create_hyp_exec_mappings - Map an executable range into HYP
+ * @phys_addr: The physical start address which gets mapped
+ * @size: Size of the region being mapped
+ * @haddr: HYP VA for this mapping
+ */
+int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
+ void **haddr)
+{
+ unsigned long addr;
+ int ret;
+
+ BUG_ON(is_kernel_in_hyp_mode());
+
+ ret = __create_hyp_private_mapping(phys_addr, size,
+ &addr, PAGE_HYP_EXEC);
+ if (ret) {
+ *haddr = NULL;
+ return ret;
+ }
+
+ *haddr = (void *)addr;
+ return 0;
}
/**
@@ -1801,7 +1913,9 @@ int kvm_mmu_init(void)
int err;
hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);