From 45ec431c52f89218ac01d87663e54de30458edf0 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 23 Aug 2009 17:46:21 +0300 Subject: KVM: Don't wrap schedule() with vcpu_put()/vcpu_load() Preemption notifiers will do that for us automatically. Signed-off-by: Avi Kivity --- virt/kvm/kvm_main.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7495ce347344..22b520b54411 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1689,9 +1689,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) if (signal_pending(current)) break; - vcpu_put(vcpu); schedule(); - vcpu_load(vcpu); } finish_wait(&vcpu->wq, &wait); -- cgit v1.2.3 From 1a6e4a8c276e122dbeb6f9c610f29735e4236bfd Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:19 +0300 Subject: KVM: Move irq sharing information to irqchip level This removes assumptions that max GSIs is smaller than number of pins. Sharing is tracked on pin level not GSI level. [avi: no PIC on ia64] Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- virt/kvm/ioapic.h | 1 + virt/kvm/irq_comm.c | 59 ++++++++++++++++++++++++++++++++--------------------- 2 files changed, 37 insertions(+), 23 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 7080b713c160..6e461ade6365 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -41,6 +41,7 @@ struct kvm_ioapic { u32 irr; u32 pad; union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS]; + unsigned long irq_states[IOAPIC_NUM_PINS]; struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 001663ff401a..9783f5c43dae 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -31,20 +31,39 @@ #include "ioapic.h" +static inline int kvm_irq_line_state(unsigned long *irq_state, + int irq_source_id, int level) +{ + /* Logical OR for level trig interrupt */ + if (level) + set_bit(irq_source_id, irq_state); + else + clear_bit(irq_source_id, irq_state); + + return !!(*irq_state); +} + static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { #ifdef CONFIG_X86 - return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level); + struct kvm_pic *pic = pic_irqchip(kvm); + level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin], + irq_source_id, level); + return kvm_pic_set_irq(pic, e->irqchip.pin, level); #else return -1; #endif } static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { - return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level); + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin], + irq_source_id, level); + + return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level); } inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq) @@ -96,10 +115,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, } static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int level) + struct kvm *kvm, int irq_source_id, int level) { struct kvm_lapic_irq irq; + if (!level) + return -1; + trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data); irq.dest_id = (e->msi.address_lo & @@ -125,34 +147,19 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) { struct kvm_kernel_irq_routing_entry *e; - unsigned long *irq_state, sig_level; int ret = -1; trace_kvm_set_irq(irq, level, irq_source_id); WARN_ON(!mutex_is_locked(&kvm->irq_lock)); - if (irq < KVM_IOAPIC_NUM_PINS) { - irq_state = (unsigned long *)&kvm->arch.irq_states[irq]; - - /* Logical OR for level trig interrupt */ - if (level) - set_bit(irq_source_id, irq_state); - else - clear_bit(irq_source_id, irq_state); - sig_level = !!(*irq_state); - } else if (!level) - return ret; - else /* Deal with MSI/MSI-X */ - sig_level = 1; - /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ list_for_each_entry(e, &kvm->irq_routing, link) if (e->gsi == irq) { - int r = e->set(e, kvm, sig_level); + int r = e->set(e, kvm, irq_source_id, level); if (r < 0) continue; @@ -232,8 +239,14 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id) printk(KERN_ERR "kvm: IRQ source ID out of range!\n"); return; } - for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) - clear_bit(irq_source_id, &kvm->arch.irq_states[i]); + for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) { + clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]); + if (i >= 16) + continue; +#ifdef CONFIG_X86 + clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]); +#endif + } clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap); mutex_unlock(&kvm->irq_lock); } -- cgit v1.2.3 From 46e624b95c36d729bdf24010fff11d16f6fe94fa Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:20 +0300 Subject: KVM: Change irq routing table to use gsi indexed array Use gsi indexed array instead of scanning all entries on each interrupt injection. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- virt/kvm/irq_comm.c | 88 ++++++++++++++++++++++++++++++++--------------------- virt/kvm/kvm_main.c | 1 - 2 files changed, 53 insertions(+), 36 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 9783f5c43dae..81950f6f6fd9 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -144,10 +144,12 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, * = 0 Interrupt was coalesced (previous irq is still pending) * > 0 Number of CPUs interrupt was delivered to */ -int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) +int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) { struct kvm_kernel_irq_routing_entry *e; int ret = -1; + struct kvm_irq_routing_table *irq_rt; + struct hlist_node *n; trace_kvm_set_irq(irq, level, irq_source_id); @@ -157,8 +159,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - list_for_each_entry(e, &kvm->irq_routing, link) - if (e->gsi == irq) { + irq_rt = kvm->irq_routing; + if (irq < irq_rt->nr_rt_entries) + hlist_for_each_entry(e, n, &irq_rt->map[irq], link) { int r = e->set(e, kvm, irq_source_id, level); if (r < 0) continue; @@ -170,20 +173,23 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level) void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { - struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_ack_notifier *kian; struct hlist_node *n; unsigned gsi = pin; + int i; trace_kvm_ack_irq(irqchip, pin); - list_for_each_entry(e, &kvm->irq_routing, link) + for (i = 0; i < kvm->irq_routing->nr_rt_entries; i++) { + struct kvm_kernel_irq_routing_entry *e; + e = &kvm->irq_routing->rt_entries[i]; if (e->type == KVM_IRQ_ROUTING_IRQCHIP && e->irqchip.irqchip == irqchip && e->irqchip.pin == pin) { gsi = e->gsi; break; } + } hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) if (kian->gsi == gsi) @@ -280,26 +286,30 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) kimn->func(kimn, mask); } -static void __kvm_free_irq_routing(struct list_head *irq_routing) -{ - struct kvm_kernel_irq_routing_entry *e, *n; - - list_for_each_entry_safe(e, n, irq_routing, link) - kfree(e); -} - void kvm_free_irq_routing(struct kvm *kvm) { mutex_lock(&kvm->irq_lock); - __kvm_free_irq_routing(&kvm->irq_routing); + kfree(kvm->irq_routing); mutex_unlock(&kvm->irq_lock); } -static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, +static int setup_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; int delta; + struct kvm_kernel_irq_routing_entry *ei; + struct hlist_node *n; + + /* + * Do not allow GSI to be mapped to the same irqchip more than once. + * Allow only one to one mapping between GSI and MSI. + */ + hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) + if (ei->type == KVM_IRQ_ROUTING_MSI || + ue->u.irqchip.irqchip == ei->irqchip.irqchip) + return r; e->gsi = ue->gsi; e->type = ue->type; @@ -332,6 +342,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, default: goto out; } + + hlist_add_head(&e->link, &rt->map[e->gsi]); r = 0; out: return r; @@ -343,43 +355,49 @@ int kvm_set_irq_routing(struct kvm *kvm, unsigned nr, unsigned flags) { - struct list_head irq_list = LIST_HEAD_INIT(irq_list); - struct list_head tmp = LIST_HEAD_INIT(tmp); - struct kvm_kernel_irq_routing_entry *e = NULL; - unsigned i; + struct kvm_irq_routing_table *new, *old; + u32 i, nr_rt_entries = 0; int r; + for (i = 0; i < nr; ++i) { + if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES) + return -EINVAL; + nr_rt_entries = max(nr_rt_entries, ue[i].gsi); + } + + nr_rt_entries += 1; + + new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) + + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), + GFP_KERNEL); + + if (!new) + return -ENOMEM; + + new->rt_entries = (void *)&new->map[nr_rt_entries]; + + new->nr_rt_entries = nr_rt_entries; + for (i = 0; i < nr; ++i) { r = -EINVAL; - if (ue->gsi >= KVM_MAX_IRQ_ROUTES) - goto out; if (ue->flags) goto out; - r = -ENOMEM; - e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - goto out; - r = setup_routing_entry(e, ue); + r = setup_routing_entry(new, &new->rt_entries[i], ue); if (r) goto out; ++ue; - list_add(&e->link, &irq_list); - e = NULL; } mutex_lock(&kvm->irq_lock); - list_splice(&kvm->irq_routing, &tmp); - INIT_LIST_HEAD(&kvm->irq_routing); - list_splice(&irq_list, &kvm->irq_routing); - INIT_LIST_HEAD(&irq_list); - list_splice(&tmp, &irq_list); + old = kvm->irq_routing; + kvm->irq_routing = new; mutex_unlock(&kvm->irq_lock); + new = old; r = 0; out: - kfree(e); - __kvm_free_irq_routing(&irq_list); + kfree(new); return r; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 22b520b54411..3bee94892774 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -957,7 +957,6 @@ static struct kvm *kvm_create_vm(void) if (IS_ERR(kvm)) goto out; #ifdef CONFIG_HAVE_KVM_IRQCHIP - INIT_LIST_HEAD(&kvm->irq_routing); INIT_HLIST_HEAD(&kvm->mask_notifier_list); #endif -- cgit v1.2.3 From 3e71f88bc90792a187703860cf22fbed7c12cbd9 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:21 +0300 Subject: KVM: Maintain back mapping from irqchip/pin to gsi Maintain back mapping from irqchip/pin to gsi to speedup interrupt acknowledgment notifications. [avi: build fix on non-x86/ia64] Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- virt/kvm/irq_comm.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 81950f6f6fd9..59cf8dae0062 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -175,25 +175,16 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) { struct kvm_irq_ack_notifier *kian; struct hlist_node *n; - unsigned gsi = pin; - int i; + int gsi; trace_kvm_ack_irq(irqchip, pin); - for (i = 0; i < kvm->irq_routing->nr_rt_entries; i++) { - struct kvm_kernel_irq_routing_entry *e; - e = &kvm->irq_routing->rt_entries[i]; - if (e->type == KVM_IRQ_ROUTING_IRQCHIP && - e->irqchip.irqchip == irqchip && - e->irqchip.pin == pin) { - gsi = e->gsi; - break; - } - } - - hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) - if (kian->gsi == gsi) - kian->irq_acked(kian); + gsi = kvm->irq_routing->chip[irqchip][pin]; + if (gsi != -1) + hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); } void kvm_register_irq_ack_notifier(struct kvm *kvm, @@ -332,6 +323,9 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, } e->irqchip.irqchip = ue->u.irqchip.irqchip; e->irqchip.pin = ue->u.irqchip.pin + delta; + if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS) + goto out; + rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi; break; case KVM_IRQ_ROUTING_MSI: e->set = kvm_set_msi; @@ -356,7 +350,7 @@ int kvm_set_irq_routing(struct kvm *kvm, unsigned flags) { struct kvm_irq_routing_table *new, *old; - u32 i, nr_rt_entries = 0; + u32 i, j, nr_rt_entries = 0; int r; for (i = 0; i < nr; ++i) { @@ -377,6 +371,9 @@ int kvm_set_irq_routing(struct kvm *kvm, new->rt_entries = (void *)&new->map[nr_rt_entries]; new->nr_rt_entries = nr_rt_entries; + for (i = 0; i < 3; i++) + for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++) + new->chip[i][j] = -1; for (i = 0; i < nr; ++i) { r = -EINVAL; -- cgit v1.2.3 From e42bba90bfe724e32fc66ab1b6cb283c51d064b1 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:22 +0300 Subject: KVM: Move irq routing data structure to rcu locking Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- virt/kvm/irq_comm.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 59cf8dae0062..fb861dd956fc 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -159,7 +159,8 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. */ - irq_rt = kvm->irq_routing; + rcu_read_lock(); + irq_rt = rcu_dereference(kvm->irq_routing); if (irq < irq_rt->nr_rt_entries) hlist_for_each_entry(e, n, &irq_rt->map[irq], link) { int r = e->set(e, kvm, irq_source_id, level); @@ -168,6 +169,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) ret = r + ((ret < 0) ? 0 : ret); } + rcu_read_unlock(); return ret; } @@ -179,7 +181,10 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) trace_kvm_ack_irq(irqchip, pin); - gsi = kvm->irq_routing->chip[irqchip][pin]; + rcu_read_lock(); + gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; + rcu_read_unlock(); + if (gsi != -1) hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link) @@ -279,9 +284,9 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) void kvm_free_irq_routing(struct kvm *kvm) { - mutex_lock(&kvm->irq_lock); + /* Called only during vm destruction. Nobody can use the pointer + at this stage */ kfree(kvm->irq_routing); - mutex_unlock(&kvm->irq_lock); } static int setup_routing_entry(struct kvm_irq_routing_table *rt, @@ -387,8 +392,9 @@ int kvm_set_irq_routing(struct kvm *kvm, mutex_lock(&kvm->irq_lock); old = kvm->irq_routing; - kvm->irq_routing = new; + rcu_assign_pointer(kvm->irq_routing, new); mutex_unlock(&kvm->irq_lock); + synchronize_rcu(); new = old; r = 0; -- cgit v1.2.3 From 136bdfeee7b5bc986fc94af3a40d7d13ea37bb95 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:23 +0300 Subject: KVM: Move irq ack notifier list to arch independent code Mask irq notifier list is already there. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- virt/kvm/irq_comm.c | 5 ++--- virt/kvm/kvm_main.c | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index fb861dd956fc..f01972595938 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -186,8 +186,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) rcu_read_unlock(); if (gsi != -1) - hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, - link) + hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link) if (kian->gsi == gsi) kian->irq_acked(kian); } @@ -196,7 +195,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian) { mutex_lock(&kvm->irq_lock); - hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list); + hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list); mutex_unlock(&kvm->irq_lock); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3bee94892774..6eca153e1a02 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -958,6 +958,7 @@ static struct kvm *kvm_create_vm(void) goto out; #ifdef CONFIG_HAVE_KVM_IRQCHIP INIT_HLIST_HEAD(&kvm->mask_notifier_list); + INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list); #endif #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET -- cgit v1.2.3 From 280aa177dcd1edc718d8a92f17f235b783ec6307 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:24 +0300 Subject: KVM: Convert irq notifiers lists to RCU locking Use RCU locking for mask/ack notifiers lists. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- virt/kvm/irq_comm.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index f01972595938..6c946141dbcc 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -183,19 +183,19 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) rcu_read_lock(); gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin]; - rcu_read_unlock(); - if (gsi != -1) - hlist_for_each_entry(kian, n, &kvm->irq_ack_notifier_list, link) + hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list, + link) if (kian->gsi == gsi) kian->irq_acked(kian); + rcu_read_unlock(); } void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian) { mutex_lock(&kvm->irq_lock); - hlist_add_head(&kian->link, &kvm->irq_ack_notifier_list); + hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list); mutex_unlock(&kvm->irq_lock); } @@ -203,8 +203,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian) { mutex_lock(&kvm->irq_lock); - hlist_del_init(&kian->link); + hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); + synchronize_rcu(); } int kvm_request_irq_source_id(struct kvm *kvm) @@ -257,7 +258,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, { mutex_lock(&kvm->irq_lock); kimn->irq = irq; - hlist_add_head(&kimn->link, &kvm->mask_notifier_list); + hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list); mutex_unlock(&kvm->irq_lock); } @@ -265,8 +266,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, struct kvm_irq_mask_notifier *kimn) { mutex_lock(&kvm->irq_lock); - hlist_del(&kimn->link); + hlist_del_rcu(&kimn->link); mutex_unlock(&kvm->irq_lock); + synchronize_rcu(); } void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) @@ -274,11 +276,11 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask) struct kvm_irq_mask_notifier *kimn; struct hlist_node *n; - WARN_ON(!mutex_is_locked(&kvm->irq_lock)); - - hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link) + rcu_read_lock(); + hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link) if (kimn->irq == irq) kimn->func(kimn, mask); + rcu_read_unlock(); } void kvm_free_irq_routing(struct kvm *kvm) -- cgit v1.2.3 From eba0226bdfffe262e72b8360e4d0d12070e9a0f0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:25 +0300 Subject: KVM: Move IO APIC to its own lock The allows removal of irq_lock from the injection path. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- virt/kvm/ioapic.c | 80 ++++++++++++++++++++++++++++++++++++++++------------- virt/kvm/ioapic.h | 4 +++ virt/kvm/irq_comm.c | 23 +++++++++------ 3 files changed, 79 insertions(+), 28 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 9fe140bb38ec..38a2d20b89de 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) union kvm_ioapic_redirect_entry entry; int ret = 1; + mutex_lock(&ioapic->lock); if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; level ^= entry.fields.polarity; @@ -198,34 +199,51 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) } trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0); } + mutex_unlock(&ioapic->lock); + return ret; } -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin, - int trigger_mode) +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector, + int trigger_mode) { - union kvm_ioapic_redirect_entry *ent; + int i; + + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; - ent = &ioapic->redirtbl[pin]; + if (ent->fields.vector != vector) + continue; - kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); + /* + * We are dropping lock while calling ack notifiers because ack + * notifier callbacks for assigned devices call into IOAPIC + * recursively. Since remote_irr is cleared only after call + * to notifiers if the same vector will be delivered while lock + * is dropped it will be put into irr and will be delivered + * after ack notifier returns. + */ + mutex_unlock(&ioapic->lock); + kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i); + mutex_lock(&ioapic->lock); + + if (trigger_mode != IOAPIC_LEVEL_TRIG) + continue; - if (trigger_mode == IOAPIC_LEVEL_TRIG) { ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); ent->fields.remote_irr = 0; - if (!ent->fields.mask && (ioapic->irr & (1 << pin))) - ioapic_service(ioapic, pin); + if (!ent->fields.mask && (ioapic->irr & (1 << i))) + ioapic_service(ioapic, i); } } void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - int i; - for (i = 0; i < IOAPIC_NUM_PINS; i++) - if (ioapic->redirtbl[i].fields.vector == vector) - __kvm_ioapic_update_eoi(ioapic, i, trigger_mode); + mutex_lock(&ioapic->lock); + __kvm_ioapic_update_eoi(ioapic, vector, trigger_mode); + mutex_unlock(&ioapic->lock); } static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) @@ -250,8 +268,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, ioapic_debug("addr %lx\n", (unsigned long)addr); ASSERT(!(addr & 0xf)); /* check alignment */ - mutex_lock(&ioapic->kvm->irq_lock); addr &= 0xff; + mutex_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: result = ioapic->ioregsel; @@ -265,6 +283,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, result = 0; break; } + mutex_unlock(&ioapic->lock); + switch (len) { case 8: *(u64 *) val = result; @@ -277,7 +297,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, default: printk(KERN_WARNING "ioapic: wrong length %d\n", len); } - mutex_unlock(&ioapic->kvm->irq_lock); return 0; } @@ -293,15 +312,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, (void*)addr, len, val); ASSERT(!(addr & 0xf)); /* check alignment */ - mutex_lock(&ioapic->kvm->irq_lock); if (len == 4 || len == 8) data = *(u32 *) val; else { printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); - goto unlock; + return 0; } addr &= 0xff; + mutex_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: ioapic->ioregsel = data; @@ -312,15 +331,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, break; #ifdef CONFIG_IA64 case IOAPIC_REG_EOI: - kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG); + __kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG); break; #endif default: break; } -unlock: - mutex_unlock(&ioapic->kvm->irq_lock); + mutex_unlock(&ioapic->lock); return 0; } @@ -349,6 +367,7 @@ int kvm_ioapic_init(struct kvm *kvm) ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL); if (!ioapic) return -ENOMEM; + mutex_init(&ioapic->lock); kvm->arch.vioapic = ioapic; kvm_ioapic_reset(ioapic); kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); @@ -360,3 +379,26 @@ int kvm_ioapic_init(struct kvm *kvm) return ret; } +int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +{ + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + if (!ioapic) + return -EINVAL; + + mutex_lock(&ioapic->lock); + memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); + mutex_unlock(&ioapic->lock); + return 0; +} + +int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) +{ + struct kvm_ioapic *ioapic = ioapic_irqchip(kvm); + if (!ioapic) + return -EINVAL; + + mutex_lock(&ioapic->lock); + memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); + mutex_unlock(&ioapic->lock); + return 0; +} diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 6e461ade6365..419c43b667ab 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -45,6 +45,7 @@ struct kvm_ioapic { struct kvm_io_device dev; struct kvm *kvm; void (*ack_notifier)(void *opaque, int irq); + struct mutex lock; }; #ifdef DEBUG @@ -74,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level); void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq); +int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); +int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); + #endif diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 6c946141dbcc..fadf4408a820 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -146,8 +146,8 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, */ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) { - struct kvm_kernel_irq_routing_entry *e; - int ret = -1; + struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; + int ret = -1, i = 0; struct kvm_irq_routing_table *irq_rt; struct hlist_node *n; @@ -162,14 +162,19 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) rcu_read_lock(); irq_rt = rcu_dereference(kvm->irq_routing); if (irq < irq_rt->nr_rt_entries) - hlist_for_each_entry(e, n, &irq_rt->map[irq], link) { - int r = e->set(e, kvm, irq_source_id, level); - if (r < 0) - continue; - - ret = r + ((ret < 0) ? 0 : ret); - } + hlist_for_each_entry(e, n, &irq_rt->map[irq], link) + irq_set[i++] = *e; rcu_read_unlock(); + + while(i--) { + int r; + r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level); + if (r < 0) + continue; + + ret = r + ((ret < 0) ? 0 : ret); + } + return ret; } -- cgit v1.2.3 From 680b3648ba89c44ac8d0316f78a0d6e147b88809 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 24 Aug 2009 11:54:26 +0300 Subject: KVM: Drop kvm->irq_lock lock from irq injection path The only thing it protects now is interrupt injection into lapic and this can work lockless. Even now with kvm->irq_lock in place access to lapic is not entirely serialized since vcpu access doesn't take kvm->irq_lock. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- virt/kvm/eventfd.c | 2 -- virt/kvm/irq_comm.c | 6 +----- virt/kvm/kvm_main.c | 2 -- 3 files changed, 1 insertion(+), 9 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index bb4ebd89b9ff..30f70fd511c4 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work) struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); struct kvm *kvm = irqfd->kvm; - mutex_lock(&kvm->irq_lock); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1); kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0); - mutex_unlock(&kvm->irq_lock); } /* diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index fadf4408a820..15a83b93566d 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -82,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, int i, r = -1; struct kvm_vcpu *vcpu, *lowest = NULL; - WARN_ON(!mutex_is_locked(&kvm->irq_lock)); - if (irq->dest_mode == 0 && irq->dest_id == 0xff && kvm_is_dm_lowest_prio(irq)) printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n"); @@ -138,7 +136,7 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return kvm_irq_delivery_to_apic(kvm, NULL, &irq); } -/* This should be called with the kvm->irq_lock mutex held +/* * Return value: * < 0 Interrupt was ignored (masked or not delivered for other reasons) * = 0 Interrupt was coalesced (previous irq is still pending) @@ -153,8 +151,6 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level) trace_kvm_set_irq(irq, level, irq_source_id); - WARN_ON(!mutex_is_locked(&kvm->irq_lock)); - /* Not possible to detect if the guest uses the PIC or the * IOAPIC. So set the bit in both. The guest will ignore * writes to the unused one. diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6eca153e1a02..c12c95b1b641 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -137,7 +137,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) interrupt_work); kvm = assigned_dev->kvm; - mutex_lock(&kvm->irq_lock); spin_lock_irq(&assigned_dev->assigned_dev_lock); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { struct kvm_guest_msix_entry *guest_entries = @@ -156,7 +155,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) assigned_dev->guest_irq, 1); spin_unlock_irq(&assigned_dev->assigned_dev_lock); - mutex_unlock(&assigned_dev->kvm->irq_lock); } static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) -- cgit v1.2.3 From bfd99ff5d483b11c32bca49fbff7a5ac59038b0a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 26 Aug 2009 14:57:50 +0300 Subject: KVM: Move assigned device code to own file Signed-off-by: Avi Kivity --- virt/kvm/assigned-dev.c | 818 ++++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/kvm_main.c | 798 +--------------------------------------------- 2 files changed, 820 insertions(+), 796 deletions(-) create mode 100644 virt/kvm/assigned-dev.c (limited to 'virt/kvm') diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c new file mode 100644 index 000000000000..fd9c097b760a --- /dev/null +++ b/virt/kvm/assigned-dev.c @@ -0,0 +1,818 @@ +/* + * Kernel-based Virtual Machine - device assignment support + * + * Copyright (C) 2006-9 Red Hat, Inc + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "irq.h" + +static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, + int assigned_dev_id) +{ + struct list_head *ptr; + struct kvm_assigned_dev_kernel *match; + + list_for_each(ptr, head) { + match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); + if (match->assigned_dev_id == assigned_dev_id) + return match; + } + return NULL; +} + +static int find_index_from_host_irq(struct kvm_assigned_dev_kernel + *assigned_dev, int irq) +{ + int i, index; + struct msix_entry *host_msix_entries; + + host_msix_entries = assigned_dev->host_msix_entries; + + index = -1; + for (i = 0; i < assigned_dev->entries_nr; i++) + if (irq == host_msix_entries[i].vector) { + index = i; + break; + } + if (index < 0) { + printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); + return 0; + } + + return index; +} + +static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) +{ + struct kvm_assigned_dev_kernel *assigned_dev; + struct kvm *kvm; + int i; + + assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, + interrupt_work); + kvm = assigned_dev->kvm; + + spin_lock_irq(&assigned_dev->assigned_dev_lock); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + struct kvm_guest_msix_entry *guest_entries = + assigned_dev->guest_msix_entries; + for (i = 0; i < assigned_dev->entries_nr; i++) { + if (!(guest_entries[i].flags & + KVM_ASSIGNED_MSIX_PENDING)) + continue; + guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, + guest_entries[i].vector, 1); + } + } else + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + + spin_unlock_irq(&assigned_dev->assigned_dev_lock); +} + +static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) +{ + unsigned long flags; + struct kvm_assigned_dev_kernel *assigned_dev = + (struct kvm_assigned_dev_kernel *) dev_id; + + spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + int index = find_index_from_host_irq(assigned_dev, irq); + if (index < 0) + goto out; + assigned_dev->guest_msix_entries[index].flags |= + KVM_ASSIGNED_MSIX_PENDING; + } + + schedule_work(&assigned_dev->interrupt_work); + + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { + disable_irq_nosync(irq); + assigned_dev->host_irq_disabled = true; + } + +out: + spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); + return IRQ_HANDLED; +} + +/* Ack the irq line for an assigned device */ +static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) +{ + struct kvm_assigned_dev_kernel *dev; + unsigned long flags; + + if (kian->gsi == -1) + return; + + dev = container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); + + kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); + + /* The guest irq may be shared so this ack may be + * from another device. + */ + spin_lock_irqsave(&dev->assigned_dev_lock, flags); + if (dev->host_irq_disabled) { + enable_irq(dev->host_irq); + dev->host_irq_disabled = false; + } + spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); +} + +static void deassign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); + assigned_dev->ack_notifier.gsi = -1; + + if (assigned_dev->irq_source_id != -1) + kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); + assigned_dev->irq_source_id = -1; + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); +} + +/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ +static void deassign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + /* + * In kvm_free_device_irq, cancel_work_sync return true if: + * 1. work is scheduled, and then cancelled. + * 2. work callback is executed. + * + * The first one ensured that the irq is disabled and no more events + * would happen. But for the second one, the irq may be enabled (e.g. + * for MSI). So we disable irq here to prevent further events. + * + * Notice this maybe result in nested disable if the interrupt type is + * INTx, but it's OK for we are going to free it. + * + * If this function is a part of VM destroy, please ensure that till + * now, the kvm state is still legal for probably we also have to wait + * interrupt_work done. + */ + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { + int i; + for (i = 0; i < assigned_dev->entries_nr; i++) + disable_irq_nosync(assigned_dev-> + host_msix_entries[i].vector); + + cancel_work_sync(&assigned_dev->interrupt_work); + + for (i = 0; i < assigned_dev->entries_nr; i++) + free_irq(assigned_dev->host_msix_entries[i].vector, + (void *)assigned_dev); + + assigned_dev->entries_nr = 0; + kfree(assigned_dev->host_msix_entries); + kfree(assigned_dev->guest_msix_entries); + pci_disable_msix(assigned_dev->dev); + } else { + /* Deal with MSI and INTx */ + disable_irq_nosync(assigned_dev->host_irq); + cancel_work_sync(&assigned_dev->interrupt_work); + + free_irq(assigned_dev->host_irq, (void *)assigned_dev); + + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) + pci_disable_msi(assigned_dev->dev); + } + + assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); +} + +static int kvm_deassign_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev, + unsigned long irq_requested_type) +{ + unsigned long guest_irq_type, host_irq_type; + + if (!irqchip_in_kernel(kvm)) + return -EINVAL; + /* no irq assignment to deassign */ + if (!assigned_dev->irq_requested_type) + return -ENXIO; + + host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; + guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; + + if (host_irq_type) + deassign_host_irq(kvm, assigned_dev); + if (guest_irq_type) + deassign_guest_irq(kvm, assigned_dev); + + return 0; +} + +static void kvm_free_assigned_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); +} + +static void kvm_free_assigned_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel + *assigned_dev) +{ + kvm_free_assigned_irq(kvm, assigned_dev); + + pci_reset_function(assigned_dev->dev); + + pci_release_regions(assigned_dev->dev); + pci_disable_device(assigned_dev->dev); + pci_dev_put(assigned_dev->dev); + + list_del(&assigned_dev->list); + kfree(assigned_dev); +} + +void kvm_free_all_assigned_devices(struct kvm *kvm) +{ + struct list_head *ptr, *ptr2; + struct kvm_assigned_dev_kernel *assigned_dev; + + list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { + assigned_dev = list_entry(ptr, + struct kvm_assigned_dev_kernel, + list); + + kvm_free_assigned_device(kvm, assigned_dev); + } +} + +static int assigned_device_enable_host_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + dev->host_irq = dev->dev->irq; + /* Even though this is PCI, we don't want to use shared + * interrupts. Sharing host devices with guest-assigned devices + * on the same interrupt line is not a happy situation: there + * are going to be long delays in accepting, acking, etc. + */ + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, + 0, "kvm_assigned_intx_device", (void *)dev)) + return -EIO; + return 0; +} + +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_host_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int r; + + if (!dev->dev->msi_enabled) { + r = pci_enable_msi(dev->dev); + if (r) + return r; + } + + dev->host_irq = dev->dev->irq; + if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, + "kvm_assigned_msi_device", (void *)dev)) { + pci_disable_msi(dev->dev); + return -EIO; + } + + return 0; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_host_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev) +{ + int i, r = -EINVAL; + + /* host_msix_entries and guest_msix_entries should have been + * initialized */ + if (dev->entries_nr == 0) + return r; + + r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); + if (r) + return r; + + for (i = 0; i < dev->entries_nr; i++) { + r = request_irq(dev->host_msix_entries[i].vector, + kvm_assigned_dev_intr, 0, + "kvm_assigned_msix_device", + (void *)dev); + /* FIXME: free requested_irq's on failure */ + if (r) + return r; + } + + return 0; +} + +#endif + +static int assigned_device_enable_guest_intx(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = irq->guest_irq; + return 0; +} + +#ifdef __KVM_HAVE_MSI +static int assigned_device_enable_guest_msi(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + dev->host_irq_disabled = false; + return 0; +} +#endif + +#ifdef __KVM_HAVE_MSIX +static int assigned_device_enable_guest_msix(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq) +{ + dev->guest_irq = irq->guest_irq; + dev->ack_notifier.gsi = -1; + dev->host_irq_disabled = false; + return 0; +} +#endif + +static int assign_host_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + __u32 host_irq_type) +{ + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) + return r; + + switch (host_irq_type) { + case KVM_DEV_IRQ_HOST_INTX: + r = assigned_device_enable_host_intx(kvm, dev); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_HOST_MSI: + r = assigned_device_enable_host_msi(kvm, dev); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_HOST_MSIX: + r = assigned_device_enable_host_msix(kvm, dev); + break; +#endif + default: + r = -EINVAL; + } + + if (!r) + dev->irq_requested_type |= host_irq_type; + + return r; +} + +static int assign_guest_irq(struct kvm *kvm, + struct kvm_assigned_dev_kernel *dev, + struct kvm_assigned_irq *irq, + unsigned long guest_irq_type) +{ + int id; + int r = -EEXIST; + + if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) + return r; + + id = kvm_request_irq_source_id(kvm); + if (id < 0) + return id; + + dev->irq_source_id = id; + + switch (guest_irq_type) { + case KVM_DEV_IRQ_GUEST_INTX: + r = assigned_device_enable_guest_intx(kvm, dev, irq); + break; +#ifdef __KVM_HAVE_MSI + case KVM_DEV_IRQ_GUEST_MSI: + r = assigned_device_enable_guest_msi(kvm, dev, irq); + break; +#endif +#ifdef __KVM_HAVE_MSIX + case KVM_DEV_IRQ_GUEST_MSIX: + r = assigned_device_enable_guest_msix(kvm, dev, irq); + break; +#endif + default: + r = -EINVAL; + } + + if (!r) { + dev->irq_requested_type |= guest_irq_type; + kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); + } else + kvm_free_irq_source_id(kvm, dev->irq_source_id); + + return r; +} + +/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ +static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, + struct kvm_assigned_irq *assigned_irq) +{ + int r = -EINVAL; + struct kvm_assigned_dev_kernel *match; + unsigned long host_irq_type, guest_irq_type; + + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + + if (!irqchip_in_kernel(kvm)) + return r; + + mutex_lock(&kvm->lock); + r = -ENODEV; + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); + guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK); + + r = -EINVAL; + /* can only assign one type at a time */ + if (hweight_long(host_irq_type) > 1) + goto out; + if (hweight_long(guest_irq_type) > 1) + goto out; + if (host_irq_type == 0 && guest_irq_type == 0) + goto out; + + r = 0; + if (host_irq_type) + r = assign_host_irq(kvm, match, host_irq_type); + if (r) + goto out; + + if (guest_irq_type) + r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type); +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, + struct kvm_assigned_irq + *assigned_irq) +{ + int r = -ENODEV; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_irq->assigned_dev_id); + if (!match) + goto out; + + r = kvm_deassign_irq(kvm, match, assigned_irq->flags); +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_assign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + struct pci_dev *dev; + + down_read(&kvm->slots_lock); + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (match) { + /* device already assigned */ + r = -EEXIST; + goto out; + } + + match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL); + if (match == NULL) { + printk(KERN_INFO "%s: Couldn't allocate memory\n", + __func__); + r = -ENOMEM; + goto out; + } + dev = pci_get_bus_and_slot(assigned_dev->busnr, + assigned_dev->devfn); + if (!dev) { + printk(KERN_INFO "%s: host device not found\n", __func__); + r = -EINVAL; + goto out_free; + } + if (pci_enable_device(dev)) { + printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); + r = -EBUSY; + goto out_put; + } + r = pci_request_regions(dev, "kvm_assigned_device"); + if (r) { + printk(KERN_INFO "%s: Could not get access to device regions\n", + __func__); + goto out_disable; + } + + pci_reset_function(dev); + + match->assigned_dev_id = assigned_dev->assigned_dev_id; + match->host_busnr = assigned_dev->busnr; + match->host_devfn = assigned_dev->devfn; + match->flags = assigned_dev->flags; + match->dev = dev; + spin_lock_init(&match->assigned_dev_lock); + match->irq_source_id = -1; + match->kvm = kvm; + match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; + INIT_WORK(&match->interrupt_work, + kvm_assigned_dev_interrupt_work_handler); + + list_add(&match->list, &kvm->arch.assigned_dev_head); + + if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { + if (!kvm->arch.iommu_domain) { + r = kvm_iommu_map_guest(kvm); + if (r) + goto out_list_del; + } + r = kvm_assign_device(kvm, match); + if (r) + goto out_list_del; + } + +out: + mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); + return r; +out_list_del: + list_del(&match->list); + pci_release_regions(dev); +out_disable: + pci_disable_device(dev); +out_put: + pci_dev_put(dev); +out_free: + kfree(match); + mutex_unlock(&kvm->lock); + up_read(&kvm->slots_lock); + return r; +} + +static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (!match) { + printk(KERN_INFO "%s: device hasn't been assigned before, " + "so cannot be deassigned\n", __func__); + r = -EINVAL; + goto out; + } + + if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) + kvm_deassign_device(kvm, match); + + kvm_free_assigned_device(kvm, match); + +out: + mutex_unlock(&kvm->lock); + return r; +} + + +#ifdef __KVM_HAVE_MSIX +static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm, + struct kvm_assigned_msix_nr *entry_nr) +{ + int r = 0; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry_nr->assigned_dev_id); + if (!adev) { + r = -EINVAL; + goto msix_nr_out; + } + + if (adev->entries_nr == 0) { + adev->entries_nr = entry_nr->entry_nr; + if (adev->entries_nr == 0 || + adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) { + r = -EINVAL; + goto msix_nr_out; + } + + adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) * + entry_nr->entry_nr, + GFP_KERNEL); + if (!adev->host_msix_entries) { + r = -ENOMEM; + goto msix_nr_out; + } + adev->guest_msix_entries = kzalloc( + sizeof(struct kvm_guest_msix_entry) * + entry_nr->entry_nr, GFP_KERNEL); + if (!adev->guest_msix_entries) { + kfree(adev->host_msix_entries); + r = -ENOMEM; + goto msix_nr_out; + } + } else /* Not allowed set MSI-X number twice */ + r = -EINVAL; +msix_nr_out: + mutex_unlock(&kvm->lock); + return r; +} + +static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm, + struct kvm_assigned_msix_entry *entry) +{ + int r = 0, i; + struct kvm_assigned_dev_kernel *adev; + + mutex_lock(&kvm->lock); + + adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + entry->assigned_dev_id); + + if (!adev) { + r = -EINVAL; + goto msix_entry_out; + } + + for (i = 0; i < adev->entries_nr; i++) + if (adev->guest_msix_entries[i].vector == 0 || + adev->guest_msix_entries[i].entry == entry->entry) { + adev->guest_msix_entries[i].entry = entry->entry; + adev->guest_msix_entries[i].vector = entry->gsi; + adev->host_msix_entries[i].entry = entry->entry; + break; + } + if (i == adev->entries_nr) { + r = -ENOSPC; + goto msix_entry_out; + } + +msix_entry_out: + mutex_unlock(&kvm->lock); + + return r; +} +#endif + +long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + int r = -ENOTTY; + + switch (ioctl) { + case KVM_ASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } + case KVM_ASSIGN_IRQ: { + r = -EOPNOTSUPP; + break; + } +#ifdef KVM_CAP_ASSIGN_DEV_IRQ + case KVM_ASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } + case KVM_DEASSIGN_DEV_IRQ: { + struct kvm_assigned_irq assigned_irq; + + r = -EFAULT; + if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq)) + goto out; + r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq); + if (r) + goto out; + break; + } +#endif +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT + case KVM_DEASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev); + if (r) + goto out; + break; + } +#endif +#ifdef KVM_CAP_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(&routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr >= KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting->entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } +#endif /* KVM_CAP_IRQ_ROUTING */ +#ifdef __KVM_HAVE_MSIX + case KVM_ASSIGN_SET_MSIX_NR: { + struct kvm_assigned_msix_nr entry_nr; + r = -EFAULT; + if (copy_from_user(&entry_nr, argp, sizeof entry_nr)) + goto out; + r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr); + if (r) + goto out; + break; + } + case KVM_ASSIGN_SET_MSIX_ENTRY: { + struct kvm_assigned_msix_entry entry; + r = -EFAULT; + if (copy_from_user(&entry, argp, sizeof entry)) + goto out; + r = kvm_vm_ioctl_set_msix_entry(kvm, &entry); + if (r) + goto out; + break; + } +#endif + } +out: + return r; +} + diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index c12c95b1b641..38e4d2c34ac1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -53,12 +53,6 @@ #include "coalesced_mmio.h" #endif -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -#include -#include -#include "irq.h" -#endif - #define CREATE_TRACE_POINTS #include @@ -90,608 +84,6 @@ static bool kvm_rebooting; static bool largepages_enabled = true; -#ifdef KVM_CAP_DEVICE_ASSIGNMENT -static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, - int assigned_dev_id) -{ - struct list_head *ptr; - struct kvm_assigned_dev_kernel *match; - - list_for_each(ptr, head) { - match = list_entry(ptr, struct kvm_assigned_dev_kernel, list); - if (match->assigned_dev_id == assigned_dev_id) - return match; - } - return NULL; -} - -static int find_index_from_host_irq(struct kvm_assigned_dev_kernel - *assigned_dev, int irq) -{ - int i, index; - struct msix_entry *host_msix_entries; - - host_msix_entries = assigned_dev->host_msix_entries; - - index = -1; - for (i = 0; i < assigned_dev->entries_nr; i++) - if (irq == host_msix_entries[i].vector) { - index = i; - break; - } - if (index < 0) { - printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n"); - return 0; - } - - return index; -} - -static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) -{ - struct kvm_assigned_dev_kernel *assigned_dev; - struct kvm *kvm; - int i; - - assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, - interrupt_work); - kvm = assigned_dev->kvm; - - spin_lock_irq(&assigned_dev->assigned_dev_lock); - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - struct kvm_guest_msix_entry *guest_entries = - assigned_dev->guest_msix_entries; - for (i = 0; i < assigned_dev->entries_nr; i++) { - if (!(guest_entries[i].flags & - KVM_ASSIGNED_MSIX_PENDING)) - continue; - guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING; - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, - guest_entries[i].vector, 1); - } - } else - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); - - spin_unlock_irq(&assigned_dev->assigned_dev_lock); -} - -static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) -{ - unsigned long flags; - struct kvm_assigned_dev_kernel *assigned_dev = - (struct kvm_assigned_dev_kernel *) dev_id; - - spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags); - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - int index = find_index_from_host_irq(assigned_dev, irq); - if (index < 0) - goto out; - assigned_dev->guest_msix_entries[index].flags |= - KVM_ASSIGNED_MSIX_PENDING; - } - - schedule_work(&assigned_dev->interrupt_work); - - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { - disable_irq_nosync(irq); - assigned_dev->host_irq_disabled = true; - } - -out: - spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); - return IRQ_HANDLED; -} - -/* Ack the irq line for an assigned device */ -static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) -{ - struct kvm_assigned_dev_kernel *dev; - unsigned long flags; - - if (kian->gsi == -1) - return; - - dev = container_of(kian, struct kvm_assigned_dev_kernel, - ack_notifier); - - kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); - - /* The guest irq may be shared so this ack may be - * from another device. - */ - spin_lock_irqsave(&dev->assigned_dev_lock, flags); - if (dev->host_irq_disabled) { - enable_irq(dev->host_irq); - dev->host_irq_disabled = false; - } - spin_unlock_irqrestore(&dev->assigned_dev_lock, flags); -} - -static void deassign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); - assigned_dev->ack_notifier.gsi = -1; - - if (assigned_dev->irq_source_id != -1) - kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id); - assigned_dev->irq_source_id = -1; - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK); -} - -/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */ -static void deassign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - /* - * In kvm_free_device_irq, cancel_work_sync return true if: - * 1. work is scheduled, and then cancelled. - * 2. work callback is executed. - * - * The first one ensured that the irq is disabled and no more events - * would happen. But for the second one, the irq may be enabled (e.g. - * for MSI). So we disable irq here to prevent further events. - * - * Notice this maybe result in nested disable if the interrupt type is - * INTx, but it's OK for we are going to free it. - * - * If this function is a part of VM destroy, please ensure that till - * now, the kvm state is still legal for probably we also have to wait - * interrupt_work done. - */ - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - int i; - for (i = 0; i < assigned_dev->entries_nr; i++) - disable_irq_nosync(assigned_dev-> - host_msix_entries[i].vector); - - cancel_work_sync(&assigned_dev->interrupt_work); - - for (i = 0; i < assigned_dev->entries_nr; i++) - free_irq(assigned_dev->host_msix_entries[i].vector, - (void *)assigned_dev); - - assigned_dev->entries_nr = 0; - kfree(assigned_dev->host_msix_entries); - kfree(assigned_dev->guest_msix_entries); - pci_disable_msix(assigned_dev->dev); - } else { - /* Deal with MSI and INTx */ - disable_irq_nosync(assigned_dev->host_irq); - cancel_work_sync(&assigned_dev->interrupt_work); - - free_irq(assigned_dev->host_irq, (void *)assigned_dev); - - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) - pci_disable_msi(assigned_dev->dev); - } - - assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK); -} - -static int kvm_deassign_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev, - unsigned long irq_requested_type) -{ - unsigned long guest_irq_type, host_irq_type; - - if (!irqchip_in_kernel(kvm)) - return -EINVAL; - /* no irq assignment to deassign */ - if (!assigned_dev->irq_requested_type) - return -ENXIO; - - host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK; - guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK; - - if (host_irq_type) - deassign_host_irq(kvm, assigned_dev); - if (guest_irq_type) - deassign_guest_irq(kvm, assigned_dev); - - return 0; -} - -static void kvm_free_assigned_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) -{ - kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type); -} - -static void kvm_free_assigned_device(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) -{ - kvm_free_assigned_irq(kvm, assigned_dev); - - pci_reset_function(assigned_dev->dev); - - pci_release_regions(assigned_dev->dev); - pci_disable_device(assigned_dev->dev); - pci_dev_put(assigned_dev->dev); - - list_del(&assigned_dev->list); - kfree(assigned_dev); -} - -void kvm_free_all_assigned_devices(struct kvm *kvm) -{ - struct list_head *ptr, *ptr2; - struct kvm_assigned_dev_kernel *assigned_dev; - - list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) { - assigned_dev = list_entry(ptr, - struct kvm_assigned_dev_kernel, - list); - - kvm_free_assigned_device(kvm, assigned_dev); - } -} - -static int assigned_device_enable_host_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - dev->host_irq = dev->dev->irq; - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. - */ - if (request_irq(dev->host_irq, kvm_assigned_dev_intr, - 0, "kvm_assigned_intx_device", (void *)dev)) - return -EIO; - return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_host_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int r; - - if (!dev->dev->msi_enabled) { - r = pci_enable_msi(dev->dev); - if (r) - return r; - } - - dev->host_irq = dev->dev->irq; - if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0, - "kvm_assigned_msi_device", (void *)dev)) { - pci_disable_msi(dev->dev); - return -EIO; - } - - return 0; -} -#endif - -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_host_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev) -{ - int i, r = -EINVAL; - - /* host_msix_entries and guest_msix_entries should have been - * initialized */ - if (dev->entries_nr == 0) - return r; - - r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr); - if (r) - return r; - - for (i = 0; i < dev->entries_nr; i++) { - r = request_irq(dev->host_msix_entries[i].vector, - kvm_assigned_dev_intr, 0, - "kvm_assigned_msix_device", - (void *)dev); - /* FIXME: free requested_irq's on failure */ - if (r) - return r; - } - - return 0; -} - -#endif - -static int assigned_device_enable_guest_intx(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = irq->guest_irq; - return 0; -} - -#ifdef __KVM_HAVE_MSI -static int assigned_device_enable_guest_msi(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; - return 0; -} -#endif -#ifdef __KVM_HAVE_MSIX -static int assigned_device_enable_guest_msix(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq) -{ - dev->guest_irq = irq->guest_irq; - dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; - return 0; -} -#endif - -static int assign_host_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - __u32 host_irq_type) -{ - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK) - return r; - - switch (host_irq_type) { - case KVM_DEV_IRQ_HOST_INTX: - r = assigned_device_enable_host_intx(kvm, dev); - break; -#ifdef __KVM_HAVE_MSI - case KVM_DEV_IRQ_HOST_MSI: - r = assigned_device_enable_host_msi(kvm, dev); - break; -#endif -#ifdef __KVM_HAVE_MSIX - case KVM_DEV_IRQ_HOST_MSIX: - r = assigned_device_enable_host_msix(kvm, dev); - break; -#endif - default: - r = -EINVAL; - } - - if (!r) - dev->irq_requested_type |= host_irq_type; - - return r; -} - -static int assign_guest_irq(struct kvm *kvm, - struct kvm_assigned_dev_kernel *dev, - struct kvm_assigned_irq *irq, - unsigned long guest_irq_type) -{ - int id; - int r = -EEXIST; - - if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK) - return r; - - id = kvm_request_irq_source_id(kvm); - if (id < 0) - return id; - - dev->irq_source_id = id; - - switch (guest_irq_type) { - case KVM_DEV_IRQ_GUEST_INTX: - r = assigned_device_enable_guest_intx(kvm, dev, irq); - break; -#ifdef __KVM_HAVE_MSI - case KVM_DEV_IRQ_GUEST_MSI: - r = assigned_device_enable_guest_msi(kvm, dev, irq); - break; -#endif -#ifdef __KVM_HAVE_MSIX - case KVM_DEV_IRQ_GUEST_MSIX: - r = assigned_device_enable_guest_msix(kvm, dev, irq); - break; -#endif - default: - r = -EINVAL; - } - - if (!r) { - dev->irq_requested_type |= guest_irq_type; - kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); - } else - kvm_free_irq_source_id(kvm, dev->irq_source_id); - - return r; -} - -/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */ -static int kvm_vm_ioctl_assign_irq(struct kvm *kvm, - struct kvm_assigned_irq *assigned_irq) -{ - int r = -EINVAL; - struct kvm_assigned_dev_kernel *match; - unsigned long host_irq_type, guest_irq_type; - - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - - if (!irqchip_in_kernel(kvm)) - return r; - - mutex_lock(&kvm->lock); - r = -ENODEV; - match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, - assigned_irq->assigned_dev_id); - if (!match) - goto out; - - host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK); - guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_G