diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2015-04-07 18:09:20 +0200 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2015-04-07 18:09:20 +0200 |
commit | bf0fb67cf957fc8ecfaaa2819b7d6a0f795e2ef2 (patch) | |
tree | 22697f7deae781dbbacd2e19a5030df2e8551e6a /virt | |
parent | 8999602d08a804ae9cb271fdd5378f910058112d (diff) | |
parent | d44758c0dfc5993a4b9952935a7eae4c91ebb6b4 (diff) |
Merge tag 'kvm-arm-for-4.1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into 'kvm-next'
KVM/ARM changes for v4.1:
- fixes for live migration
- irqfd support
- kvm-io-bus & vgic rework to enable ioeventfd
- page ageing for stage-2 translation
- various cleanups
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/arch_timer.c | 45 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v2-emul.c | 71 | ||||
-rw-r--r-- | virt/kvm/arm/vgic-v3-emul.c | 246 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.c | 479 | ||||
-rw-r--r-- | virt/kvm/arm/vgic.h | 37 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 7 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 9 | ||||
-rw-r--r-- | virt/kvm/iodev.h | 70 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 34 |
9 files changed, 609 insertions, 389 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 6e54f3542126..98c95f2fcba4 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -85,13 +85,22 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) return IRQ_HANDLED; } +/* + * Work function for handling the backup timer that we schedule when a vcpu is + * no longer running, but had a timer programmed to fire in the future. + */ static void kvm_timer_inject_irq_work(struct work_struct *work) { struct kvm_vcpu *vcpu; vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; - kvm_timer_inject_irq(vcpu); + + /* + * If the vcpu is blocked we want to wake it up so that it will see + * the timer has expired when entering the guest. + */ + kvm_vcpu_kick(vcpu); } static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) @@ -102,6 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + cycle_t cval, now; + + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) + return false; + + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + return cval <= now; +} + /** * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu * @vcpu: The vcpu pointer @@ -119,6 +143,13 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * populate the CPU timer again. */ timer_disarm(timer); + + /* + * If the timer expired while we were not scheduled, now is the time + * to inject it. + */ + if (kvm_timer_should_fire(vcpu)) + kvm_timer_inject_irq(vcpu); } /** @@ -134,16 +165,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) cycle_t cval, now; u64 ns; - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) - return; - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - BUG_ON(timer_is_armed(timer)); - if (cval <= now) { + if (kvm_timer_should_fire(vcpu)) { /* * Timer has already expired while we were not * looking. Inject the interrupt and carry on. @@ -152,6 +176,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) return; } + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, &timecounter->frac); timer_arm(timer, ns); diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 19c6210f02cf..13907970d11c 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -107,6 +107,22 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, vcpu->vcpu_id); } +static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + +static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -303,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } -static const struct kvm_mmio_range vgic_dist_ranges[] = { +static const struct vgic_io_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, @@ -344,13 +360,13 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = { .base = GIC_DIST_ACTIVE_SET, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_set_active_reg, }, { .base = GIC_DIST_ACTIVE_CLEAR, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_clear_active_reg, }, { .base = GIC_DIST_PRI, @@ -388,24 +404,6 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = { {} }; -static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base; - - if (!is_in_range(mmio->phys_addr, mmio->len, base, - KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* GICv2 does not support accesses wider than 32 bits */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); -} - static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) { struct kvm *kvm = vcpu->kvm; @@ -490,6 +488,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) static int vgic_v2_map_resources(struct kvm *kvm, const struct vgic_params *params) { + struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; if (!irqchip_in_kernel(kvm)) @@ -500,13 +499,17 @@ static int vgic_v2_map_resources(struct kvm *kvm, if (vgic_ready(kvm)) goto out; - if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); ret = -ENXIO; goto out; } + vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + KVM_VGIC_V2_DIST_SIZE, + vgic_dist_ranges, -1, &dist->dist_iodev); + /* * Initialize the vgic if this hasn't already been done on demand by * accessing the vgic state from userspace. @@ -514,18 +517,23 @@ static int vgic_v2_map_resources(struct kvm *kvm, ret = vgic_init(kvm); if (ret) { kvm_err("Unable to allocate maps\n"); - goto out; + goto out_unregister; } - ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; + goto out_unregister; } - kvm->arch.vgic.ready = true; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + out: if (ret) kvm_vgic_destroy(kvm); @@ -554,7 +562,6 @@ void vgic_v2_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v2_handle_mmio; dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; dist->vm_ops.init_model = vgic_v2_init_model; @@ -631,7 +638,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, * CPU Interface Register accesses - these are not accessed by the VM, but by * user space for saving and restoring VGIC state. */ -static const struct kvm_mmio_range vgic_cpu_ranges[] = { +static const struct vgic_io_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, .len = 12, @@ -658,12 +665,13 @@ static int vgic_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, u32 *reg, bool is_write) { - const struct kvm_mmio_range *r = NULL, *ranges; + const struct vgic_io_range *r = NULL, *ranges; phys_addr_t offset; int ret, cpuid, c; struct kvm_vcpu *vcpu, *tmp_vcpu; struct vgic_dist *vgic; struct kvm_exit_mmio mmio; + u32 data; offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> @@ -685,6 +693,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mmio.len = 4; mmio.is_write = is_write; + mmio.data = &data; if (is_write) mmio_data_write(&mmio, ~0, *reg); switch (attr->group) { @@ -699,7 +708,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, default: BUG(); } - r = vgic_find_range(ranges, &mmio, offset); + r = vgic_find_range(ranges, 4, offset); if (unlikely(!r || !r->handle_mmio)) { ret = -ENXIO; diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index b3f154631515..e9c3a7a83833 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, return false; } -static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { +static const struct vgic_io_range vgic_v3_dist_ranges[] = { { .base = GICD_CTLR, .len = 0x04, @@ -502,6 +502,43 @@ static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { {}, }; +static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + /* since we don't support LPIs, this register is zero for now */ + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + u64 mpidr; + struct kvm_vcpu *redist_vcpu = mmio->private; + int target_vcpu_id = redist_vcpu->vcpu_id; + + /* the upper 32 bits contain the affinity value */ + if ((offset & ~3) == 4) { + mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); + reg = compress_mpidr(mpidr); + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; + } + + reg = redist_vcpu->vcpu_id << 8; + if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) + reg |= GICR_TYPER_LAST; + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; +} + static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -570,186 +607,107 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, return vgic_handle_cfg_reg(reg, mmio, offset); } -static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = { +#define SGI_base(x) ((x) + SZ_64K) + +static const struct vgic_io_range vgic_redist_ranges[] = { + { + .base = GICR_CTLR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_ctlr_redist, + }, + { + .base = GICR_TYPER, + .len = 0x08, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_typer_redist, + }, + { + .base = GICR_IIDR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_iidr, + }, + { + .base = GICR_WAKER, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, { - .base = GICR_IGROUPR0, + .base = GICR_IDREGS, + .len = 0x30, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_idregs, + }, + { + .base = SGI_base(GICR_IGROUPR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_rao_wi, }, { - .base = GICR_ISENABLER0, + .base = SGI_base(GICR_ISENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_enable_reg_redist, }, { - .base = GICR_ICENABLER0, + .base = SGI_base(GICR_ICENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_enable_reg_redist, }, { - .base = GICR_ISPENDR0, + .base = SGI_base(GICR_ISPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_pending_reg_redist, }, { - .base = GICR_ICPENDR0, + .base = SGI_base(GICR_ICPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_pending_reg_redist, }, { - .base = GICR_ISACTIVER0, + .base = SGI_base(GICR_ISACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_ICACTIVER0, + .base = SGI_base(GICR_ICACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_IPRIORITYR0, + .base = SGI_base(GICR_IPRIORITYR0), .len = 0x20, .bits_per_irq = 8, .handle_mmio = handle_mmio_priority_reg_redist, }, { - .base = GICR_ICFGR0, + .base = SGI_base(GICR_ICFGR0), .len = 0x08, .bits_per_irq = 2, .handle_mmio = handle_mmio_cfg_reg_redist, }, { - .base = GICR_IGRPMODR0, + .base = SGI_base(GICR_IGRPMODR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_NSACR, + .base = SGI_base(GICR_NSACR), .len = 0x04, .handle_mmio = handle_mmio_raz_wi, }, {}, }; -static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - /* since we don't support LPIs, this register is zero for now */ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - u64 mpidr; - struct kvm_vcpu *redist_vcpu = mmio->private; - int target_vcpu_id = redist_vcpu->vcpu_id; - - /* the upper 32 bits contain the affinity value */ - if ((offset & ~3) == 4) { - mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); - reg = compress_mpidr(mpidr); - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = redist_vcpu->vcpu_id << 8; - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) - reg |= GICR_TYPER_LAST; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; -} - -static const struct kvm_mmio_range vgic_redist_ranges[] = { - { - .base = GICR_CTLR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_ctlr_redist, - }, - { - .base = GICR_TYPER, - .len = 0x08, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_typer_redist, - }, - { - .base = GICR_IIDR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_iidr, - }, - { - .base = GICR_WAKER, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICR_IDREGS, - .len = 0x30, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_idregs, - }, - {}, -}; - -/* - * This function splits accesses between the distributor and the two - * redistributor parts (private/SPI). As each redistributor is accessible - * from any CPU, we have to determine the affected VCPU by taking the faulting - * address into account. We then pass this VCPU to the handler function via - * the private parameter. - */ -#define SGI_BASE_OFFSET SZ_64K -static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long dbase = dist->vgic_dist_base; - unsigned long rdbase = dist->vgic_redist_base; - int nrcpus = atomic_read(&vcpu->kvm->online_vcpus); - int vcpu_id; - const struct kvm_mmio_range *mmio_range; - - if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) { - return vgic_handle_mmio_range(vcpu, run, mmio, - vgic_v3_dist_ranges, dbase); - } - - if (!is_in_range(mmio->phys_addr, mmio->len, rdbase, - GIC_V3_REDIST_SIZE * nrcpus)) - return false; - - vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE; - rdbase += (vcpu_id * GIC_V3_REDIST_SIZE); - mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id); - - if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) { - rdbase += SGI_BASE_OFFSET; - mmio_range = vgic_redist_sgi_ranges; - } else { - mmio_range = vgic_redist_ranges; - } - return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase); -} - static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) { if (vgic_queue_irq(vcpu, 0, irq)) { @@ -766,6 +724,9 @@ static int vgic_v3_map_resources(struct kvm *kvm, { int ret = 0; struct vgic_dist *dist = &kvm->arch.vgic; + gpa_t rdbase = dist->vgic_redist_base; + struct vgic_io_device *iodevs = NULL; + int i; if (!irqchip_in_kernel(kvm)) return 0; @@ -791,7 +752,41 @@ static int vgic_v3_map_resources(struct kvm *kvm, goto out; } - kvm->arch.vgic.ready = true; + ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + GIC_V3_DIST_SIZE, vgic_v3_dist_ranges, + -1, &dist->dist_iodev); + if (ret) + goto out; + + iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL); + if (!iodevs) { + ret = -ENOMEM; + goto out_unregister; + } + + for (i = 0; i < dist->nr_cpus; i++) { + ret = vgic_register_kvm_io_dev(kvm, rdbase, + SZ_128K, vgic_redist_ranges, + i, &iodevs[i]); + if (ret) + goto out_unregister; + rdbase += GIC_V3_REDIST_SIZE; + } + + dist->redist_iodevs = iodevs; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + if (iodevs) { + for (i = 0; i < dist->nr_cpus; i++) { + if (iodevs[i].dev.ops) + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + &iodevs[i].dev); + } + } + out: if (ret) kvm_vgic_destroy(kvm); @@ -832,7 +827,6 @@ void vgic_v3_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v3_handle_mmio; dist->vm_ops.queue_sgi = vgic_v3_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source; dist->vm_ops.init_model = vgic_v3_init_model; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c9f60f524588..8d550ff14700 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -31,6 +31,9 @@ #include <asm/kvm_emulate.h> #include <asm/kvm_arm.h> #include <asm/kvm_mmu.h> +#include <trace/events/kvm.h> +#include <asm/kvm.h> +#include <kvm/iodev.h> /* * How the whole thing works (courtesy of Christoffer Dall): @@ -263,6 +266,13 @@ static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); } +static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); +} + static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -277,6 +287,20 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } +static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); +} + +static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); +} + static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -520,6 +544,44 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, return false; } +bool vgic_handle_set_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + +bool vgic_handle_clear_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + static u32 vgic_cfg_expand(u16 val) { u32 res = 0; @@ -588,16 +650,12 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, } /** - * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor + * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs * - * Move any pending IRQs that have already been assigned to LRs back to the + * Move any IRQs that have already been assigned to LRs back to the * emulated distributor state so that the complete emulated state can be read * from the main emulation structures without investigating the LRs. - * - * Note that IRQs in the active state in the LRs get their pending state moved - * to the distributor but the active state stays in the LRs, because we don't - * track the active state on the distributor side. */ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { @@ -613,12 +671,22 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * 01: pending * 10: active * 11: pending and active - * - * If the LR holds only an active interrupt (not pending) then - * just leave it alone. */ - if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE) - continue; + BUG_ON(!(lr.state & LR_STATE_MASK)); + + /* Reestablish SGI source for pending and active IRQs */ + if (lr.irq < VGIC_NR_SGIS) + add_sgi_source(vcpu, lr.irq, lr.source); + + /* + * If the LR holds an active (10) or a pending and active (11) + * interrupt then move the active state to the + * distributor tracking bit. + */ + if (lr.state & LR_STATE_ACTIVE) { + vgic_irq_set_active(vcpu, lr.irq); + lr.state &= ~LR_STATE_ACTIVE; + } /* * Reestablish the pending state on the distributor and the @@ -626,21 +694,19 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set_pending(vcpu, lr.irq); - if (lr.irq < VGIC_NR_SGIS) - add_sgi_source(vcpu, lr.irq, lr.source); - lr.state &= ~LR_STATE_PENDING; + if (lr.state & LR_STATE_PENDING) { + vgic_dist_irq_set_pending(vcpu, lr.irq); + lr.state &= ~LR_STATE_PENDING; + } + vgic_set_lr(vcpu, i, lr); /* - * If there's no state left on the LR (it could still be - * active), then the LR does not hold any useful info and can - * be marked as free for other use. + * Mark the LR as free for other use. */ - if (!(lr.state & LR_STATE_MASK)) { - vgic_retire_lr(i, lr.irq, vcpu); - vgic_irq_clear_queued(vcpu, lr.irq); - } + BUG_ON(lr.state & LR_STATE_MASK); + vgic_retire_lr(i, lr.irq, vcpu); + vgic_irq_clear_queued(vcpu, lr.irq); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -648,24 +714,21 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - const struct kvm_mmio_range *r = ranges; - - while (r->len) { - if (offset >= r->base && - (offset + mmio->len) <= (r->base + r->len)) - return r; - r++; +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + int len, gpa_t offset) +{ + while (ranges->len) { + if (offset >= ranges->base && + (offset + len) <= (ranges->base + ranges->len)) + return ranges; + ranges++; } return NULL; } static bool vgic_validate_access(const struct vgic_dist *dist, - const struct kvm_mmio_range *range, + const struct vgic_io_range *range, unsigned long offset) { int irq; @@ -693,9 +756,8 @@ static bool vgic_validate_access(const struct vgic_dist *dist, static bool call_range_handler(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, unsigned long offset, - const struct kvm_mmio_range *range) + const struct vgic_io_range *range) { - u32 *data32 = (void *)mmio->data; struct kvm_exit_mmio mmio32; bool ret; @@ -712,91 +774,142 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, mmio32.private = mmio->private; mmio32.phys_addr = mmio->phys_addr + 4; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[1]; + mmio32.data = &((u32 *)mmio->data)[1]; ret = range->handle_mmio(vcpu, &mmio32, offset + 4); - if (!mmio->is_write) - data32[1] = *(u32 *)mmio32.data; mmio32.phys_addr = mmio->phys_addr; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[0]; + mmio32.data = &((u32 *)mmio->data)[0]; ret |= range->handle_mmio(vcpu, &mmio32, offset); - if (!mmio->is_write) - data32[0] = *(u32 *)mmio32.data; return ret; } /** - * vgic_handle_mmio_range - handle an in-kernel MMIO access + * vgic_handle_mmio_access - handle an in-kernel MMIO access + * This is called by the read/write KVM IO device wrappers below. * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access - * @ranges: array of MMIO ranges in a given region - * @mmio_base: base address of that region + * @this: pointer to the KVM IO device in charge + * @addr: guest physical address of the access + * @len: size of the access + * @val: pointer to the data region + * @is_write: read or write access * * returns true if the MMIO access could be performed */ -bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio, - const struct kvm_mmio_range *ranges, - unsigned long mmio_base) +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, void *val, bool is_write) { - const struct kvm_mmio_range *range; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_io_device *iodev = container_of(this, + struct vgic_io_device, dev); + struct kvm_run *run = vcpu->run; + const struct vgic_io_range *range; + struct kvm_exit_mmio mmio; bool updated_state; - unsigned long offset; + gpa_t offset; - offset = mmio->phys_addr - mmio_base; - range = vgic_find_range(ranges, mmio, offset); + offset = addr - iodev->addr; + range = vgic_find_range(iodev->reg_ranges, len, offset); if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", - mmio->is_write, mmio->phys_addr, mmio->len); - return false; + pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len); + return -ENXIO; } - spin_lock(&vcpu->kvm->arch.vgic.lock); + mmio.phys_addr = addr; + mmio.len = len; + mmio.is_write = is_write; + mmio.data = val; + mmio.private = iodev->redist_vcpu; + + spin_lock(&dist->lock); offset -= range->base; if (vgic_validate_access(dist, range, offset)) { - updated_state = call_range_handler(vcpu, mmio, offset, range); + updated_state = call_range_handler(vcpu, &mmio, offset, range); } else { - if (!mmio->is_write) - memset(mmio->data, 0, mmio->len); + if (!is_write) + memset(val, 0, len); updated_state = false; } - spin_unlock(&vcpu->kvm->arch.vgic.lock); - kvm_prepare_mmio(run, mmio); + spin_unlock(&dist->lock); + run->mmio.is_write = is_write; + run->mmio.len = len; + run->mmio.phys_addr = addr; + memcpy(run->mmio.data, val, len); + kvm_handle_mmio_return(vcpu, run); if (updated_state) vgic_kick_vcpus(vcpu->kvm); - return true; + return 0; +} + +static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, val, false); } +static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val, + true); +} + +struct kvm_io_device_ops vgic_io_ops = { + .read = vgic_handle_mmio_read, + .write = vgic_handle_mmio_write, +}; + /** - * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation - * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access + * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus + * @kvm: The VM structure pointer + * @base: The (guest) base address for the register frame + * @len: Length of the register frame window + * @ranges: Describing the handler functions for each register + * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call + * @iodev: Points to memory to be passed on to the handler * - * returns true if the MMIO access has been performed in kernel space, - * and false if it needs to be emulated in user space. - * Calls the actual handling routine for the selected VGIC model. + * @iodev stores the parameters of this function to be usable by the handler + * respectively the dispatcher function (since the KVM I/O bus framework lacks + * an opaque parameter). Initialization is done in this function, but the + * reference should be valid and unique for the whole VGIC lifetime. + * If the register frame is not mapped for a specific VCPU, pass -1 to + * @redist_vcpu_id. */ -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, + const struct vgic_io_range *ranges, + int redist_vcpu_id, + struct vgic_io_device *iodev) { - if (!irqchip_in_kernel(vcpu->kvm)) - return false; + struct kvm_vcpu *vcpu = NULL; + int ret; - /* - * This will currently call either vgic_v2_handle_mmio() or - * vgic_v3_handle_mmio(), which in turn will call - * vgic_handle_mmio_range() defined above. - */ - return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); + if (redist_vcpu_id >= 0) + vcpu = kvm_get_vcpu(kvm, redist_vcpu_id); + + iodev->addr = base; + iodev->len = len; + iodev->reg_ranges = ranges; + iodev->redist_vcpu = vcpu; + + kvm_iodevice_init(&iodev->dev, &vgic_io_ops); + + mutex_lock(&kvm->slots_lock); + + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len, + &iodev->dev); + mutex_unlock(&kvm->slots_lock); + + /* Mark the iodev as invalid if registration fails. */ + if (ret) + iodev->dev.ops = NULL; + + return ret; } static int vgic_nr_shared_irqs(struct vgic_dist *dist) @@ -804,6 +917,36 @@ static int vgic_nr_shared_irqs(struct vgic_dist *dist) return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; } +static int compute_active_for_cpu(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *active, *enabled, *act_percpu, *act_shared; + unsigned long active_private, active_shared; + int nr_shared = vgic_nr_shared_irqs(dist); + int vcpu_id; + + vcpu_id = vcpu->vcpu_id; + act_percpu = vcpu->arch.vgic_cpu.active_percpu; + act_shared = vcpu->arch.vgic_cpu.active_shared; < |