summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-06 18:38:31 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-06 18:38:31 -0700
commitc136b84393d4e340e1b53fc7f737dd5827b19ee5 (patch)
tree985a1bdfafe7ec5ce2d3c738f601cad3998d8ce9 /virt
parente0f25a3f2d052e36ff67a9b4db835c3e27e950d8 (diff)
parent1372324b328cd5dabaef5e345e37ad48c63df2a9 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "PPC: - Better machine check handling for HV KVM - Ability to support guests with threads=2, 4 or 8 on POWER9 - Fix for a race that could cause delayed recognition of signals - Fix for a bug where POWER9 guests could sleep with interrupts pending. ARM: - VCPU request overhaul - allow timer and PMU to have their interrupt number selected from userspace - workaround for Cavium erratum 30115 - handling of memory poisonning - the usual crop of fixes and cleanups s390: - initial machine check forwarding - migration support for the CMMA page hinting information - cleanups and fixes x86: - nested VMX bugfixes and improvements - more reliable NMI window detection on AMD - APIC timer optimizations Generic: - VCPU request overhaul + documentation of common code patterns - kvm_stat improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (124 commits) Update my email address kvm: vmx: allow host to access guest MSR_IA32_BNDCFGS x86: kvm: mmu: use ept a/d in vmcs02 iff used in vmcs12 kvm: x86: mmu: allow A/D bits to be disabled in an mmu x86: kvm: mmu: make spte mmio mask more explicit x86: kvm: mmu: dead code thanks to access tracking KVM: PPC: Book3S: Fix typo in XICS-on-XIVE state saving code KVM: PPC: Book3S HV: Close race with testing for signals on guest entry KVM: PPC: Book3S HV: Simplify dynamic micro-threading code KVM: x86: remove ignored type attribute KVM: LAPIC: Fix lapic timer injection delay KVM: lapic: reorganize restart_apic_timer KVM: lapic: reorganize start_hv_timer kvm: nVMX: Check memory operand to INVVPID KVM: s390: Inject machine check into the nested guest KVM: s390: Inject machine check into the guest tools/kvm_stat: add new interactive command 'b' tools/kvm_stat: add new command line switch '-i' tools/kvm_stat: fix error on interactive command 'g' KVM: SVM: suppress unnecessary NMI singlestep on GIF=0 and nested exit ...
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/aarch32.c2
-rw-r--r--virt/kvm/arm/arch_timer.c139
-rw-r--r--virt/kvm/arm/arm.c82
-rw-r--r--virt/kvm/arm/hyp/vgic-v3-sr.c823
-rw-r--r--virt/kvm/arm/mmu.c23
-rw-r--r--virt/kvm/arm/pmu.c117
-rw-r--r--virt/kvm/arm/psci.c8
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c2
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c24
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c22
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c68
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h12
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c45
-rw-r--r--virt/kvm/arm/vgic/vgic.c68
-rw-r--r--virt/kvm/kvm_main.c12
15 files changed, 1297 insertions, 150 deletions
diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c
index 528af4b2d09e..79c7c357804b 100644
--- a/virt/kvm/arm/aarch32.c
+++ b/virt/kvm/arm/aarch32.c
@@ -60,7 +60,7 @@ static const unsigned short cc_map[16] = {
/*
* Check if a trapped instruction should have been executed or not.
*/
-bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
+bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu)
{
unsigned long cpsr;
u32 cpsr_cond;
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 5976609ef27c..8e89d63005c7 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -21,6 +21,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/irq.h>
+#include <linux/uaccess.h>
#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
@@ -35,6 +36,16 @@ static struct timecounter *timecounter;
static unsigned int host_vtimer_irq;
static u32 host_vtimer_irq_flags;
+static const struct kvm_irq_level default_ptimer_irq = {
+ .irq = 30,
+ .level = 1,
+};
+
+static const struct kvm_irq_level default_vtimer_irq = {
+ .irq = 27,
+ .level = 1,
+};
+
void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
{
vcpu_vtimer(vcpu)->active_cleared_last = false;
@@ -95,7 +106,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
* If the vcpu is blocked we want to wake it up so that it will see
* the timer has expired when entering the guest.
*/
- kvm_vcpu_kick(vcpu);
+ kvm_vcpu_wake_up(vcpu);
}
static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
@@ -215,7 +226,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
if (likely(irqchip_in_kernel(vcpu->kvm))) {
ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
timer_ctx->irq.irq,
- timer_ctx->irq.level);
+ timer_ctx->irq.level,
+ timer_ctx);
WARN_ON(ret);
}
}
@@ -445,23 +457,12 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
kvm_timer_update_state(vcpu);
}
-int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *virt_irq,
- const struct kvm_irq_level *phys_irq)
+int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/*
- * The vcpu timer irq number cannot be determined in
- * kvm_timer_vcpu_init() because it is called much before
- * kvm_vcpu_set_target(). To handle this, we determine
- * vcpu timer irq number when the vcpu is reset.
- */
- vtimer->irq.irq = virt_irq->irq;
- ptimer->irq.irq = phys_irq->irq;
-
- /*
* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
* and to 0 for ARMv7. We provide an implementation that always
* resets the timer to be disabled and unmasked and is compliant with
@@ -496,6 +497,8 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
/* Synchronize cntvoff across all vtimers of a VM. */
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
@@ -504,6 +507,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
INIT_WORK(&timer->expired, kvm_timer_inject_irq_work);
hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
timer->timer.function = kvm_timer_expire;
+
+ vtimer->irq.irq = default_vtimer_irq.irq;
+ ptimer->irq.irq = default_ptimer_irq.irq;
}
static void kvm_timer_init_interrupt(void *info)
@@ -613,6 +619,30 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq);
}
+static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
+{
+ int vtimer_irq, ptimer_irq;
+ int i, ret;
+
+ vtimer_irq = vcpu_vtimer(vcpu)->irq.irq;
+ ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu));
+ if (ret)
+ return false;
+
+ ptimer_irq = vcpu_ptimer(vcpu)->irq.irq;
+ ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu));
+ if (ret)
+ return false;
+
+ kvm_for_each_vcpu(i, vcpu, vcpu->kvm) {
+ if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq ||
+ vcpu_ptimer(vcpu)->irq.irq != ptimer_irq)
+ return false;
+ }
+
+ return true;
+}
+
int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
@@ -632,6 +662,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
if (!vgic_initialized(vcpu->kvm))
return -ENODEV;
+ if (!timer_irqs_are_valid(vcpu)) {
+ kvm_debug("incorrectly configured timer irqs\n");
+ return -EINVAL;
+ }
+
/*
* Find the physical IRQ number corresponding to the host_vtimer_irq
*/
@@ -681,3 +716,79 @@ void kvm_timer_init_vhe(void)
val |= (CNTHCTL_EL1PCTEN << cnthctl_shift);
write_sysreg(val, cnthctl_el2);
}
+
+static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu_vtimer(vcpu)->irq.irq = vtimer_irq;
+ vcpu_ptimer(vcpu)->irq.irq = ptimer_irq;
+ }
+}
+
+int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ int __user *uaddr = (int __user *)(long)attr->addr;
+ struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
+ struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
+ int irq;
+
+ if (!irqchip_in_kernel(vcpu->kvm))
+ return -EINVAL;
+
+ if (get_user(irq, uaddr))
+ return -EFAULT;
+
+ if (!(irq_is_ppi(irq)))
+ return -EINVAL;
+
+ if (vcpu->arch.timer_cpu.enabled)
+ return -EBUSY;
+
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+ set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq);
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq);
+ break;
+ default:
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ int __user *uaddr = (int __user *)(long)attr->addr;
+ struct arch_timer_context *timer;
+ int irq;
+
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+ timer = vcpu_vtimer(vcpu);
+ break;
+ case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ timer = vcpu_ptimer(vcpu);
+ break;
+ default:
+ return -ENXIO;
+ }
+
+ irq = timer->irq.irq;
+ return put_user(irq, uaddr);
+}
+
+int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
+ case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
+ return 0;
+ }
+
+ return -ENXIO;
+}
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 3417e184c8e1..a39a1e161e63 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -368,6 +368,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_put(vcpu);
}
+static void vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.power_off = true;
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ kvm_vcpu_kick(vcpu);
+}
+
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
@@ -387,7 +394,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
vcpu->arch.power_off = false;
break;
case KVM_MP_STATE_STOPPED:
- vcpu->arch.power_off = true;
+ vcpu_power_off(vcpu);
break;
default:
return -EINVAL;
@@ -520,6 +527,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
}
ret = kvm_timer_enable(vcpu);
+ if (ret)
+ return ret;
+
+ ret = kvm_arm_pmu_v3_enable(vcpu);
return ret;
}
@@ -536,21 +547,7 @@ void kvm_arm_halt_guest(struct kvm *kvm)
kvm_for_each_vcpu(i, vcpu, kvm)
vcpu->arch.pause = true;
- kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT);
-}
-
-void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu)
-{
- vcpu->arch.pause = true;
- kvm_vcpu_kick(vcpu);
-}
-
-void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu)
-{
- struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
-
- vcpu->arch.pause = false;
- swake_up(wq);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP);
}
void kvm_arm_resume_guest(struct kvm *kvm)
@@ -558,16 +555,23 @@ void kvm_arm_resume_guest(struct kvm *kvm)
int i;
struct kvm_vcpu *vcpu;
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_arm_resume_vcpu(vcpu);
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ vcpu->arch.pause = false;
+ swake_up(kvm_arch_vcpu_wq(vcpu));
+ }
}
-static void vcpu_sleep(struct kvm_vcpu *vcpu)
+static void vcpu_req_sleep(struct kvm_vcpu *vcpu)
{
struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu);
swait_event_interruptible(*wq, ((!vcpu->arch.power_off) &&
(!vcpu->arch.pause)));
+
+ if (vcpu->arch.power_off || vcpu->arch.pause) {
+ /* Awaken to handle a signal, request we sleep again later. */
+ kvm_make_request(KVM_REQ_SLEEP, vcpu);
+ }
}
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
@@ -575,6 +579,20 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
return vcpu->arch.target >= 0;
}
+static void check_vcpu_requests(struct kvm_vcpu *vcpu)
+{
+ if (kvm_request_pending(vcpu)) {
+ if (kvm_check_request(KVM_REQ_SLEEP, vcpu))
+ vcpu_req_sleep(vcpu);
+
+ /*
+ * Clear IRQ_PENDING requests that were made to guarantee
+ * that a VCPU sees new virtual interrupts.
+ */
+ kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+ }
+}
+
/**
* kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code
* @vcpu: The VCPU pointer
@@ -620,8 +638,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
update_vttbr(vcpu->kvm);
- if (vcpu->arch.power_off || vcpu->arch.pause)
- vcpu_sleep(vcpu);
+ check_vcpu_requests(vcpu);
/*
* Preparing the interrupts to be injected also
@@ -650,8 +667,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
run->exit_reason = KVM_EXIT_INTR;
}
+ /*
+ * Ensure we set mode to IN_GUEST_MODE after we disable
+ * interrupts and before the final VCPU requests check.
+ * See the comment in kvm_vcpu_exiting_guest_mode() and
+ * Documentation/virtual/kvm/vcpu-requests.rst
+ */
+ smp_store_mb(vcpu->mode, IN_GUEST_MODE);
+
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) ||
- vcpu->arch.power_off || vcpu->arch.pause) {
+ kvm_request_pending(vcpu)) {
+ vcpu->mode = OUTSIDE_GUEST_MODE;
local_irq_enable();
kvm_pmu_sync_hwstate(vcpu);
kvm_timer_sync_hwstate(vcpu);
@@ -667,7 +693,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
*/
trace_kvm_entry(*vcpu_pc(vcpu));
guest_enter_irqoff();
- vcpu->mode = IN_GUEST_MODE;
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
@@ -756,6 +781,7 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level)
* trigger a world-switch round on the running physical CPU to set the
* virtual IRQ/FIQ fields in the HCR appropriately.
*/
+ kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
return 0;
@@ -806,7 +832,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level);
+ return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL);
case KVM_ARM_IRQ_TYPE_SPI:
if (!irqchip_in_kernel(kvm))
return -ENXIO;
@@ -814,7 +840,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (irq_num < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
- return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
+ return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL);
}
return -EINVAL;
@@ -884,7 +910,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
* Handle the "start in power-off" case.
*/
if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
- vcpu->arch.power_off = true;
+ vcpu_power_off(vcpu);
else
vcpu->arch.power_off = false;
@@ -1115,9 +1141,6 @@ static void cpu_init_hyp_mode(void *dummy)
__cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr);
__cpu_init_stage2();
- if (is_kernel_in_hyp_mode())
- kvm_timer_init_vhe();
-
kvm_arm_init_debug();
}
@@ -1137,6 +1160,7 @@ static void cpu_hyp_reinit(void)
* event was cancelled before the CPU was reset.
*/
__cpu_init_stage2();
+ kvm_timer_init_vhe();
} else {
cpu_init_hyp_mode(NULL);
}
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 87940364570b..91728faa13fd 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -19,10 +19,12 @@
#include <linux/irqchip/arm-gic-v3.h>
#include <linux/kvm_host.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#define vtr_to_max_lr_idx(v) ((v) & 0xf)
#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1)
+#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5))
static u64 __hyp_text __gic_v3_get_lr(unsigned int lr)
{
@@ -118,6 +120,90 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr)
}
}
+static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n)
+{
+ switch (n) {
+ case 0:
+ write_gicreg(val, ICH_AP0R0_EL2);
+ break;
+ case 1:
+ write_gicreg(val, ICH_AP0R1_EL2);
+ break;
+ case 2:
+ write_gicreg(val, ICH_AP0R2_EL2);
+ break;
+ case 3:
+ write_gicreg(val, ICH_AP0R3_EL2);
+ break;
+ }
+}
+
+static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n)
+{
+ switch (n) {
+ case 0:
+ write_gicreg(val, ICH_AP1R0_EL2);
+ break;
+ case 1:
+ write_gicreg(val, ICH_AP1R1_EL2);
+ break;
+ case 2:
+ write_gicreg(val, ICH_AP1R2_EL2);
+ break;
+ case 3:
+ write_gicreg(val, ICH_AP1R3_EL2);
+ break;
+ }
+}
+
+static u32 __hyp_text __vgic_v3_read_ap0rn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ val = read_gicreg(ICH_AP0R0_EL2);
+ break;
+ case 1:
+ val = read_gicreg(ICH_AP0R1_EL2);
+ break;
+ case 2:
+ val = read_gicreg(ICH_AP0R2_EL2);
+ break;
+ case 3:
+ val = read_gicreg(ICH_AP0R3_EL2);
+ break;
+ default:
+ unreachable();
+ }
+
+ return val;
+}
+
+static u32 __hyp_text __vgic_v3_read_ap1rn(int n)
+{
+ u32 val;
+
+ switch (n) {
+ case 0:
+ val = read_gicreg(ICH_AP1R0_EL2);
+ break;
+ case 1:
+ val = read_gicreg(ICH_AP1R1_EL2);
+ break;
+ case 2:
+ val = read_gicreg(ICH_AP1R2_EL2);
+ break;
+ case 3:
+ val = read_gicreg(ICH_AP1R3_EL2);
+ break;
+ default:
+ unreachable();
+ }
+
+ return val;
+}
+
void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
@@ -154,24 +240,27 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
switch (nr_pre_bits) {
case 7:
- cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
- cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
+ cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3);
+ cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2);
case 6:
- cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
+ cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1);
default:
- cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
+ cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0);
}
switch (nr_pre_bits) {
case 7:
- cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
- cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
+ cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3);
+ cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2);
case 6:
- cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
+ cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1);
default:
- cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
+ cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
}
} else {
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+ write_gicreg(0, ICH_HCR_EL2);
+
cpu_if->vgic_elrsr = 0xffff;
cpu_if->vgic_ap0r[0] = 0;
cpu_if->vgic_ap0r[1] = 0;
@@ -224,26 +313,34 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
switch (nr_pre_bits) {
case 7:
- write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
- write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2);
case 6:
- write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1);
default:
- write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
+ __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0);
}
switch (nr_pre_bits) {
case 7:
- write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
- write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2);
case 6:
- write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1);
default:
- write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+ __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0);
}
for (i = 0; i < used_lrs; i++)
__gic_v3_set_lr(cpu_if->vgic_lr[i], i);
+ } else {
+ /*
+ * If we need to trap system registers, we must write
+ * ICH_HCR_EL2 anyway, even if no interrupts are being
+ * injected,
+ */
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+ write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
/*
@@ -287,3 +384,697 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+
+#ifdef CONFIG_ARM64
+
+static int __hyp_text __vgic_v3_bpr_min(void)
+{
+ /* See Pseudocode for VPriorityGroup */
+ return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2));
+}
+
+static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu)
+{
+ u32 esr = kvm_vcpu_get_hsr(vcpu);
+ u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT;
+
+ return crm != 8;
+}
+
+#define GICv3_IDLE_PRIORITY 0xff
+
+static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu,
+ u32 vmcr,
+ u64 *lr_val)
+{
+ unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+ u8 priority = GICv3_IDLE_PRIORITY;
+ int i, lr = -1;
+
+ for (i = 0; i < used_lrs; i++) {
+ u64 val = __gic_v3_get_lr(i);
+ u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+
+ /* Not pending in the state? */
+ if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT)
+ continue;
+
+ /* Group-0 interrupt, but Group-0 disabled? */
+ if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK))
+ continue;
+
+ /* Group-1 interrupt, but Group-1 disabled? */
+ if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK))
+ continue;
+
+ /* Not the highest priority? */
+ if (lr_prio >= priority)
+ continue;
+
+ /* This is a candidate */
+ priority = lr_prio;
+ *lr_val = val;
+ lr = i;
+ }
+
+ if (lr == -1)
+ *lr_val = ICC_IAR1_EL1_SPURIOUS;
+
+ return lr;
+}
+
+static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu,
+ int intid, u64 *lr_val)
+{
+ unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs;
+ int i;
+
+ for (i = 0; i < used_lrs; i++) {
+ u64 val = __gic_v3_get_lr(i);
+
+ if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid &&
+ (val & ICH_LR_ACTIVE_BIT)) {
+ *lr_val = val;
+ return i;
+ }
+ }
+
+ *lr_val = ICC_IAR1_EL1_SPURIOUS;
+ return -1;
+}
+
+static int __hyp_text __vgic_v3_get_highest_active_priority(void)
+{
+ u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
+ u32 hap = 0;
+ int i;
+
+ for (i = 0; i < nr_apr_regs; i++) {
+ u32 val;
+
+ /*
+ * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers
+ * contain the active priority levels for this VCPU
+ * for the maximum number of supported priority
+ * levels, and we return the full priority level only
+ * if the BPR is programmed to its minimum, otherwise
+ * we return a combination of the priority level and
+ * subpriority, as determined by the setting of the
+ * BPR, but without the full subpriority.
+ */
+ val = __vgic_v3_read_ap0rn(i);
+ val |= __vgic_v3_read_ap1rn(i);
+ if (!val) {
+ hap += 32;
+ continue;
+ }
+
+ return (hap + __ffs(val)) << __vgic_v3_bpr_min();
+ }
+
+ return GICv3_IDLE_PRIORITY;
+}
+
+static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr)
+{
+ return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
+}
+
+static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr)
+{
+ unsigned int bpr;
+
+ if (vmcr & ICH_VMCR_CBPR_MASK) {
+ bpr = __vgic_v3_get_bpr0(vmcr);
+ if (bpr < 7)
+ bpr++;
+ } else {
+ bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
+ }
+
+ return bpr;
+}
+
+/*
+ * Convert a priority to a preemption level, taking the relevant BPR
+ * into account by zeroing the sub-priority bits.
+ */
+static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp)
+{
+ unsigned int bpr;
+
+ if (!grp)
+ bpr = __vgic_v3_get_bpr0(vmcr) + 1;
+ else
+ bpr = __vgic_v3_get_bpr1(vmcr);
+
+ return pri & (GENMASK(7, 0) << bpr);
+}
+
+/*
+ * The priority value is independent of any of the BPR values, so we
+ * normalize it using the minumal BPR value. This guarantees that no
+ * matter what the guest does with its BPR, we can always set/get the
+ * same value of a priority.
+ */
+static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp)
+{
+ u8 pre, ap;
+ u32 val;
+ int apr;
+
+ pre = __vgic_v3_pri_to_pre(pri, vmcr, grp);
+ ap = pre >> __vgic_v3_bpr_min();
+ apr = ap / 32;
+
+ if (!grp) {
+ val = __vgic_v3_read_ap0rn(apr);
+ __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr);
+ } else {
+ val = __vgic_v3_read_ap1rn(apr);
+ __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr);
+ }
+}
+
+static int __hyp_text __vgic_v3_clear_highest_active_priority(void)
+{
+ u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2));
+ u32 hap = 0;
+ int i;
+
+ for (i = 0; i < nr_apr_regs; i++) {
+ u32 ap0, ap1;
+ int c0, c1;
+
+ ap0 = __vgic_v3_read_ap0rn(i);
+ ap1 = __vgic_v3_read_ap1rn(i);
+ if (!ap0 && !ap1) {
+ hap += 32;
+ continue;
+ }
+
+ c0 = ap0 ? __ffs(ap0) : 32;
+ c1 = ap1 ? __ffs(ap1) : 32;
+
+ /* Always clear the LSB, which is the highest priority */
+ if (c0 < c1) {
+ ap0 &= ~BIT(c0);
+ __vgic_v3_write_ap0rn(ap0, i);
+ hap += c0;
+ } else {
+ ap1 &= ~BIT(c1);
+ __vgic_v3_write_ap1rn(ap1, i);
+ hap += c1;
+ }
+
+ /* Rescale to 8 bits of priority */
+ return hap << __vgic_v3_bpr_min();
+ }
+
+ return GICv3_IDLE_PRIORITY;
+}
+
+static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 lr_val;
+ u8 lr_prio, pmr;
+ int lr, grp;
+
+ grp = __vgic_v3_get_group(vcpu);
+
+ lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val);
+ if (lr < 0)
+ goto spurious;
+
+ if (grp != !!(lr_val & ICH_LR_GROUP))
+ goto spurious;
+
+ pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+ lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+ if (pmr <= lr_prio)
+ goto spurious;
+
+ if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp))
+ goto spurious;
+
+ lr_val &= ~ICH_LR_STATE;
+ /* No active state for LPIs */
+ if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI)
+ lr_val |= ICH_LR_ACTIVE_BIT;
+ __gic_v3_set_lr(lr_val, lr);
+ __vgic_v3_set_active_priority(lr_prio, vmcr, grp);
+ vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK);
+ return;
+
+spurious:
+ vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS);
+}
+
+static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val)
+{
+ lr_val &= ~ICH_LR_ACTIVE_BIT;
+ if (lr_val & ICH_LR_HW) {
+ u32 pid;
+
+ pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT;
+ gic_write_dir(pid);
+ }
+
+ __gic_v3_set_lr(lr_val, lr);
+}
+
+static void __hyp_text __vgic_v3_bump_eoicount(void)
+{
+ u32 hcr;
+
+ hcr = read_gicreg(ICH_HCR_EL2);
+ hcr += 1 << ICH_HCR_EOIcount_SHIFT;
+ write_gicreg(hcr, ICH_HCR_EL2);
+}
+
+static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu,
+ u32 vmcr, int rt)
+{
+ u32 vid = vcpu_get_reg(vcpu, rt);
+ u64 lr_val;
+ int lr;
+
+ /* EOImode == 0, nothing to be done here */
+ if (!(vmcr & ICH_VMCR_EOIM_MASK))
+ return;
+
+ /* No deactivate to be performed on an LPI */
+ if (vid >= VGIC_MIN_LPI)
+ return;
+
+ lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
+ if (lr == -1) {
+ __vgic_v3_bump_eoicount();
+ return;
+ }
+
+ __vgic_v3_clear_active_lr(lr, lr_val);
+}
+
+static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u32 vid = vcpu_get_reg(vcpu, rt);
+ u64 lr_val;
+ u8 lr_prio, act_prio;
+ int lr, grp;
+
+ grp = __vgic_v3_get_group(vcpu);
+
+ /* Drop priority in any case */
+ act_prio = __vgic_v3_clear_highest_active_priority();
+
+ /* If EOIing an LPI, no deactivate to be performed */
+ if (vid >= VGIC_MIN_LPI)
+ return;
+
+ /* EOImode == 1, nothing to be done here */
+ if (vmcr & ICH_VMCR_EOIM_MASK)
+ return;
+
+ lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val);
+ if (lr == -1) {
+ __vgic_v3_bump_eoicount();
+ return;
+ }
+
+ lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT;
+
+ /* If priorities or group do not match, the guest has fscked-up. */
+ if (grp != !!(lr_val & ICH_LR_GROUP) ||
+ __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio)
+ return;
+
+ /* Let's now perform the deactivation */
+ __vgic_v3_clear_active_lr(lr, lr_val);
+}
+
+static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK));
+}
+
+static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK));
+}
+
+static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ if (val & 1)
+ vmcr |= ICH_VMCR_ENG0_MASK;
+ else
+ vmcr &= ~ICH_VMCR_ENG0_MASK;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+
+ if (val & 1)
+ vmcr |= ICH_VMCR_ENG1_MASK;
+ else
+ vmcr &= ~ICH_VMCR_ENG1_MASK;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr));
+}
+
+static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr));
+}
+
+static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+ u8 bpr_min = __vgic_v3_bpr_min() - 1;
+
+ /* Enforce BPR limiting */
+ if (val < bpr_min)
+ val = bpr_min;
+
+ val <<= ICH_VMCR_BPR0_SHIFT;
+ val &= ICH_VMCR_BPR0_MASK;
+ vmcr &= ~ICH_VMCR_BPR0_MASK;
+ vmcr |= val;
+
+ __vgic_v3_write_vmcr(vmcr);
+}
+
+static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt)
+{
+ u64 val = vcpu_get_reg(vcpu, rt);
+ u8 bpr_min = __vgic_v3_bp