diff options
Diffstat (limited to 'drivers/xen')
-rw-r--r-- | drivers/xen/Kconfig | 10 | ||||
-rw-r--r-- | drivers/xen/Makefile | 6 | ||||
-rw-r--r-- | drivers/xen/balloon.c | 375 | ||||
-rw-r--r-- | drivers/xen/events.c | 399 | ||||
-rw-r--r-- | drivers/xen/gntalloc.c | 545 | ||||
-rw-r--r-- | drivers/xen/gntdev.c | 382 | ||||
-rw-r--r-- | drivers/xen/grant-table.c | 10 | ||||
-rw-r--r-- | drivers/xen/manage.c | 153 | ||||
-rw-r--r-- | drivers/xen/platform-pci.c | 3 | ||||
-rw-r--r-- | drivers/xen/xen-balloon.c | 256 |
10 files changed, 1508 insertions, 631 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 07bec09d1dad..a59638b37c1a 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -76,10 +76,20 @@ config XEN_XENBUS_FRONTEND config XEN_GNTDEV tristate "userspace grant access device driver" depends on XEN + default m select MMU_NOTIFIER help Allows userspace processes to use grants. +config XEN_GRANT_DEV_ALLOC + tristate "User-space grant reference allocator driver" + depends on XEN + default m + help + Allows userspace processes to create pages with access granted + to other domains. This can be used to implement frontend drivers + or as part of an inter-domain shared memory channel. + config XEN_PLATFORM_PCI tristate "xen platform pci device driver" depends on XEN_PVHVM && PCI diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 5088cc2e6fe2..f420f1ff7f13 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -1,4 +1,4 @@ -obj-y += grant-table.o features.o events.o manage.o +obj-y += grant-table.o features.o events.o manage.o balloon.o obj-y += xenbus/ nostackp := $(call cc-option, -fno-stack-protector) @@ -7,9 +7,10 @@ CFLAGS_features.o := $(nostackp) obj-$(CONFIG_BLOCK) += biomerge.o obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o -obj-$(CONFIG_XEN_BALLOON) += balloon.o +obj-$(CONFIG_XEN_BALLOON) += xen-balloon.o obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o @@ -18,5 +19,6 @@ obj-$(CONFIG_XEN_DOM0) += pci.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o +xen-gntalloc-y := gntalloc.o xen-platform-pci-y := platform-pci.o diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 43f9f02c7db0..043af8ad6b60 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -1,6 +1,4 @@ /****************************************************************************** - * balloon.c - * * Xen balloon driver - enables returning/claiming memory to/from Xen. * * Copyright (c) 2003, B Dragovic @@ -33,7 +31,6 @@ */ #include <linux/kernel.h> -#include <linux/module.h> #include <linux/sched.h> #include <linux/errno.h> #include <linux/mm.h> @@ -42,13 +39,11 @@ #include <linux/highmem.h> #include <linux/mutex.h> #include <linux/list.h> -#include <linux/sysdev.h> #include <linux/gfp.h> #include <asm/page.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> -#include <asm/uaccess.h> #include <asm/tlb.h> #include <asm/e820.h> @@ -58,35 +53,29 @@ #include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/interface/memory.h> -#include <xen/xenbus.h> +#include <xen/balloon.h> #include <xen/features.h> #include <xen/page.h> -#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) - -#define BALLOON_CLASS_NAME "xen_memory" +/* + * balloon_process() state: + * + * BP_DONE: done or nothing to do, + * BP_EAGAIN: error, go to sleep, + * BP_ECANCELED: error, balloon operation canceled. + */ -struct balloon_stats { - /* We aim for 'current allocation' == 'target allocation'. */ - unsigned long current_pages; - unsigned long target_pages; - /* - * Drivers may alter the memory reservation independently, but they - * must inform the balloon driver so we avoid hitting the hard limit. - */ - unsigned long driver_pages; - /* Number of pages in high- and low-memory balloons. */ - unsigned long balloon_low; - unsigned long balloon_high; +enum bp_state { + BP_DONE, + BP_EAGAIN, + BP_ECANCELED }; -static DEFINE_MUTEX(balloon_mutex); - -static struct sys_device balloon_sysdev; -static int register_balloon(struct sys_device *sysdev); +static DEFINE_MUTEX(balloon_mutex); -static struct balloon_stats balloon_stats; +struct balloon_stats balloon_stats; +EXPORT_SYMBOL_GPL(balloon_stats); /* We increase/decrease in batches which fit in a page */ static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; @@ -104,8 +93,7 @@ static LIST_HEAD(ballooned_pages); /* Main work function, always executed in process context. */ static void balloon_process(struct work_struct *work); -static DECLARE_WORK(balloon_worker, balloon_process); -static struct timer_list balloon_timer; +static DECLARE_DELAYED_WORK(balloon_worker, balloon_process); /* When ballooning out (allocating memory to return to Xen) we don't really want the kernel to try too hard since that can trigger the oom killer. */ @@ -140,14 +128,17 @@ static void balloon_append(struct page *page) } /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ -static struct page *balloon_retrieve(void) +static struct page *balloon_retrieve(bool prefer_highmem) { struct page *page; if (list_empty(&ballooned_pages)) return NULL; - page = list_entry(ballooned_pages.next, struct page, lru); + if (prefer_highmem) + page = list_entry(ballooned_pages.prev, struct page, lru); + else + page = list_entry(ballooned_pages.next, struct page, lru); list_del(&page->lru); if (PageHighMem(page)) { @@ -177,9 +168,29 @@ static struct page *balloon_next_page(struct page *page) return list_entry(next, struct page, lru); } -static void balloon_alarm(unsigned long unused) +static enum bp_state update_schedule(enum bp_state state) { - schedule_work(&balloon_worker); + if (state == BP_DONE) { + balloon_stats.schedule_delay = 1; + balloon_stats.retry_count = 1; + return BP_DONE; + } + + ++balloon_stats.retry_count; + + if (balloon_stats.max_retry_count != RETRY_UNLIMITED && + balloon_stats.retry_count > balloon_stats.max_retry_count) { + balloon_stats.schedule_delay = 1; + balloon_stats.retry_count = 1; + return BP_ECANCELED; + } + + balloon_stats.schedule_delay <<= 1; + + if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay) + balloon_stats.schedule_delay = balloon_stats.max_schedule_delay; + + return BP_EAGAIN; } static unsigned long current_target(void) @@ -194,11 +205,11 @@ static unsigned long current_target(void) return target; } -static int increase_reservation(unsigned long nr_pages) +static enum bp_state increase_reservation(unsigned long nr_pages) { + int rc; unsigned long pfn, i; struct page *page; - long rc; struct xen_memory_reservation reservation = { .address_bits = 0, .extent_order = 0, @@ -210,7 +221,10 @@ static int increase_reservation(unsigned long nr_pages) page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { - BUG_ON(page == NULL); + if (!page) { + nr_pages = i; + break; + } frame_list[i] = page_to_pfn(page); page = balloon_next_page(page); } @@ -218,11 +232,11 @@ static int increase_reservation(unsigned long nr_pages) set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); - if (rc < 0) - goto out; + if (rc <= 0) + return BP_EAGAIN; for (i = 0; i < rc; i++) { - page = balloon_retrieve(); + page = balloon_retrieve(false); BUG_ON(page == NULL); pfn = page_to_pfn(page); @@ -232,7 +246,7 @@ static int increase_reservation(unsigned long nr_pages) set_phys_to_machine(pfn, frame_list[i]); /* Link back into the page tables if not highmem. */ - if (pfn < max_low_pfn) { + if (!xen_hvm_domain() && pfn < max_low_pfn) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), @@ -249,15 +263,14 @@ static int increase_reservation(unsigned long nr_pages) balloon_stats.current_pages += rc; - out: - return rc < 0 ? rc : rc != nr_pages; + return BP_DONE; } -static int decrease_reservation(unsigned long nr_pages) +static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) { + enum bp_state state = BP_DONE; unsigned long pfn, i; struct page *page; - int need_sleep = 0; int ret; struct xen_memory_reservation reservation = { .address_bits = 0, @@ -269,9 +282,9 @@ static int decrease_reservation(unsigned long nr_pages) nr_pages = ARRAY_SIZE(frame_list); for (i = 0; i < nr_pages; i++) { - if ((page = alloc_page(GFP_BALLOON)) == NULL) { + if ((page = alloc_page(gfp)) == NULL) { nr_pages = i; - need_sleep = 1; + state = BP_EAGAIN; break; } @@ -280,7 +293,7 @@ static int decrease_reservation(unsigned long nr_pages) scrub_page(page); - if (!PageHighMem(page)) { + if (!xen_hvm_domain() && !PageHighMem(page)) { ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), __pte_ma(0), 0); @@ -296,7 +309,7 @@ static int decrease_reservation(unsigned long nr_pages) /* No more mappings: invalidate P2M and add to balloon. */ for (i = 0; i < nr_pages; i++) { pfn = mfn_to_pfn(frame_list[i]); - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); balloon_append(pfn_to_page(pfn)); } @@ -307,7 +320,7 @@ static int decrease_reservation(unsigned long nr_pages) balloon_stats.current_pages -= nr_pages; - return need_sleep; + return state; } /* @@ -318,99 +331,125 @@ static int decrease_reservation(unsigned long nr_pages) */ static void balloon_process(struct work_struct *work) { - int need_sleep = 0; + enum bp_state state = BP_DONE; long credit; mutex_lock(&balloon_mutex); do { credit = current_target() - balloon_stats.current_pages; + if (credit > 0) - need_sleep = (increase_reservation(credit) != 0); + state = increase_reservation(credit); + if (credit < 0) - need_sleep = (decrease_reservation(-credit) != 0); + state = decrease_reservation(-credit, GFP_BALLOON); + + state = update_schedule(state); #ifndef CONFIG_PREEMPT if (need_resched()) schedule(); #endif - } while ((credit != 0) && !need_sleep); + } while (credit && state == BP_DONE); /* Schedule more work if there is some still to be done. */ - if (current_target() != balloon_stats.current_pages) - mod_timer(&balloon_timer, jiffies + HZ); + if (state == BP_EAGAIN) + schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ); mutex_unlock(&balloon_mutex); } /* Resets the Xen limit, sets new target, and kicks off processing. */ -static void balloon_set_new_target(unsigned long target) +void balloon_set_new_target(unsigned long target) { /* No need for lock. Not read-modify-write updates. */ balloon_stats.target_pages = target; - schedule_work(&balloon_worker); + schedule_delayed_work(&balloon_worker, 0); } +EXPORT_SYMBOL_GPL(balloon_set_new_target); -static struct xenbus_watch target_watch = -{ - .node = "memory/target" -}; - -/* React to a change in the target key */ -static void watch_target(struct xenbus_watch *watch, - const char **vec, unsigned int len) +/** + * alloc_xenballooned_pages - get pages that have been ballooned out + * @nr_pages: Number of pages to get + * @pages: pages returned + * @return 0 on success, error otherwise + */ +int alloc_xenballooned_pages(int nr_pages, struct page** pages) { - unsigned long long new_target; - int err; - - err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target); - if (err != 1) { - /* This is ok (for domain0 at least) - so just return */ - return; + int pgno = 0; + struct page* page; + mutex_lock(&balloon_mutex); + while (pgno < nr_pages) { + page = balloon_retrieve(true); + if (page) { + pages[pgno++] = page; + } else { + enum bp_state st; + st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER); + if (st != BP_DONE) + goto out_undo; + } } - - /* The given memory/target value is in KiB, so it needs converting to - * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. - */ - balloon_set_new_target(new_target >> (PAGE_SHIFT - 10)); + mutex_unlock(&balloon_mutex); + return 0; + out_undo: + while (pgno) + balloon_append(pages[--pgno]); + /* Free the memory back to the kernel soon */ + schedule_delayed_work(&balloon_worker, 0); + mutex_unlock(&balloon_mutex); + return -ENOMEM; } +EXPORT_SYMBOL(alloc_xenballooned_pages); -static int balloon_init_watcher(struct notifier_block *notifier, - unsigned long event, - void *data) +/** + * free_xenballooned_pages - return pages retrieved with get_ballooned_pages + * @nr_pages: Number of pages + * @pages: pages to return + */ +void free_xenballooned_pages(int nr_pages, struct page** pages) { - int err; + int i; - err = register_xenbus_watch(&target_watch); - if (err) - printk(KERN_ERR "Failed to set balloon watcher\n"); + mutex_lock(&balloon_mutex); - return NOTIFY_DONE; -} + for (i = 0; i < nr_pages; i++) { + if (pages[i]) + balloon_append(pages[i]); + } + + /* The balloon may be too large now. Shrink it if needed. */ + if (current_target() != balloon_stats.current_pages) + schedule_delayed_work(&balloon_worker, 0); -static struct notifier_block xenstore_notifier; + mutex_unlock(&balloon_mutex); +} +EXPORT_SYMBOL(free_xenballooned_pages); static int __init balloon_init(void) { - unsigned long pfn, extra_pfn_end; + unsigned long pfn, nr_pages, extra_pfn_end; struct page *page; - if (!xen_pv_domain()) + if (!xen_domain()) return -ENODEV; - pr_info("xen_balloon: Initialising balloon driver.\n"); + pr_info("xen/balloon: Initialising balloon driver.\n"); - balloon_stats.current_pages = min(xen_start_info->nr_pages, max_pfn); + if (xen_pv_domain()) + nr_pages = xen_start_info->nr_pages; + else + nr_pages = max_pfn; + balloon_stats.current_pages = min(nr_pages, max_pfn); balloon_stats.target_pages = balloon_stats.current_pages; balloon_stats.balloon_low = 0; balloon_stats.balloon_high = 0; - balloon_stats.driver_pages = 0UL; - - init_timer(&balloon_timer); - balloon_timer.data = 0; - balloon_timer.function = balloon_alarm; - register_balloon(&balloon_sysdev); + balloon_stats.schedule_delay = 1; + balloon_stats.max_schedule_delay = 32; + balloon_stats.retry_count = 1; + balloon_stats.max_retry_count = RETRY_UNLIMITED; /* * Initialise the balloon with excess memory space. We need @@ -432,153 +471,9 @@ static int __init balloon_init(void) __balloon_append(page); } - target_watch.callback = watch_target; - xenstore_notifier.notifier_call = balloon_init_watcher; - - register_xenstore_notifier(&xenstore_notifier); - return 0; } subsys_initcall(balloon_init); -static void balloon_exit(void) -{ - /* XXX - release balloon here */ - return; -} - -module_exit(balloon_exit); - -#define BALLOON_SHOW(name, format, args...) \ - static ssize_t show_##name(struct sys_device *dev, \ - struct sysdev_attribute *attr, \ - char *buf) \ - { \ - return sprintf(buf, format, ##args); \ - } \ - static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) - -BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); -BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low)); -BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high)); -BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); - -static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr, - char *buf) -{ - return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); -} - -static ssize_t store_target_kb(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, - size_t count) -{ - char *endchar; - unsigned long long target_bytes; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - target_bytes = simple_strtoull(buf, &endchar, 0) * 1024; - - balloon_set_new_target(target_bytes >> PAGE_SHIFT); - - return count; -} - -static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR, - show_target_kb, store_target_kb); - - -static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr, - char *buf) -{ - return sprintf(buf, "%llu\n", - (unsigned long long)balloon_stats.target_pages - << PAGE_SHIFT); -} - -static ssize_t store_target(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, - size_t count) -{ - char *endchar; - unsigned long long target_bytes; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - target_bytes = memparse(buf, &endchar); - - balloon_set_new_target(target_bytes >> PAGE_SHIFT); - - return count; -} - -static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR, - show_target, store_target); - - -static struct sysdev_attribute *balloon_attrs[] = { - &attr_target_kb, - &attr_target, -}; - -static struct attribute *balloon_info_attrs[] = { - &attr_current_kb.attr, - &attr_low_kb.attr, - &attr_high_kb.attr, - &attr_driver_kb.attr, - NULL -}; - -static struct attribute_group balloon_info_group = { - .name = "info", - .attrs = balloon_info_attrs, -}; - -static struct sysdev_class balloon_sysdev_class = { - .name = BALLOON_CLASS_NAME, -}; - -static int register_balloon(struct sys_device *sysdev) -{ - int i, error; - - error = sysdev_class_register(&balloon_sysdev_class); - if (error) - return error; - - sysdev->id = 0; - sysdev->cls = &balloon_sysdev_class; - - error = sysdev_register(sysdev); - if (error) { - sysdev_class_unregister(&balloon_sysdev_class); - return error; - } - - for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) { - error = sysdev_create_file(sysdev, balloon_attrs[i]); - if (error) - goto fail; - } - - error = sysfs_create_group(&sysdev->kobj, &balloon_info_group); - if (error) - goto fail; - - return 0; - - fail: - while (--i >= 0) - sysdev_remove_file(sysdev, balloon_attrs[i]); - sysdev_unregister(sysdev); - sysdev_class_unregister(&balloon_sysdev_class); - return error; -} - MODULE_LICENSE("GPL"); diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 8786e06c4180..02b5a9c05cfa 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -56,6 +56,8 @@ */ static DEFINE_SPINLOCK(irq_mapping_update_lock); +static LIST_HEAD(xen_irq_list_head); + /* IRQ <-> VIRQ mapping. */ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; @@ -85,7 +87,9 @@ enum xen_irq_type { */ struct irq_info { + struct list_head list; enum xen_irq_type type; /* type */ + unsigned irq; unsigned short evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ @@ -103,23 +107,10 @@ struct irq_info #define PIRQ_NEEDS_EOI (1 << 0) #define PIRQ_SHAREABLE (1 << 1) -static struct irq_info *irq_info; -static int *pirq_to_irq; - static int *evtchn_to_irq; -struct cpu_evtchn_s { - unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG]; -}; -static __initdata struct cpu_evtchn_s init_evtchn_mask = { - .bits[0 ... (NR_EVENT_CHANNELS/BITS_PER_LONG)-1] = ~0ul, -}; -static struct cpu_evtchn_s *cpu_evtchn_mask_p = &init_evtchn_mask; - -static inline unsigned long *cpu_evtchn_mask(int cpu) -{ - return cpu_evtchn_mask_p[cpu].bits; -} +static DEFINE_PER_CPU(unsigned long [NR_EVENT_CHANNELS/BITS_PER_LONG], + cpu_evtchn_mask); /* Xen will never allocate port zero for any purpose. */ #define VALID_EVTCHN(chn) ((chn) != 0) @@ -128,46 +119,86 @@ static struct irq_chip xen_dynamic_chip; static struct irq_chip xen_percpu_chip; static struct irq_chip xen_pirq_chip; -/* Constructor for packed IRQ information. */ -static struct irq_info mk_unbound_info(void) +/* Get info for IRQ */ +static struct irq_info *info_for_irq(unsigned irq) +{ + return get_irq_data(irq); +} + +/* Constructors for packed IRQ information. */ +static void xen_irq_info_common_init(struct irq_info *info, + unsigned irq, + enum xen_irq_type type, + unsigned short evtchn, + unsigned short cpu) { - return (struct irq_info) { .type = IRQT_UNBOUND }; + + BUG_ON(info->type != IRQT_UNBOUND && info->type != type); + + info->type = type; + info->irq = irq; + info->evtchn = evtchn; + info->cpu = cpu; + + evtchn_to_irq[evtchn] = irq; } -static struct irq_info mk_evtchn_info(unsigned short evtchn) +static void xen_irq_info_evtchn_init(unsigned irq, + unsigned short evtchn) { - return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn, - .cpu = 0 }; + struct irq_info *info = info_for_irq(irq); + + xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0); } -static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi) +static void xen_irq_info_ipi_init(unsigned cpu, + unsigned irq, + unsigned short evtchn, + enum ipi_vector ipi) { - return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn, - .cpu = 0, .u.ipi = ipi }; + struct irq_info *info = info_for_irq(irq); + + xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0); + + info->u.ipi = ipi; + + per_cpu(ipi_to_irq, cpu)[ipi] = irq; } -static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq) +static void xen_irq_info_virq_init(unsigned cpu, + unsigned irq, + unsigned short evtchn, + unsigned short virq) { - return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn, - .cpu = 0, .u.virq = virq }; + struct irq_info *info = info_for_irq(irq); + + xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0); + + info->u.virq = virq; + + per_cpu(virq_to_irq, cpu)[virq] = irq; } -static struct irq_info mk_pirq_info(unsigned short evtchn, unsigned short pirq, - unsigned short gsi, unsigned short vector) +static void xen_irq_info_pirq_init(unsigned irq, + unsigned short evtchn, + unsigned short pirq, + unsigned short gsi, + unsigned short vector, + unsigned char flags) { - return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn, - .cpu = 0, - .u.pirq = { .pirq = pirq, .gsi = gsi, .vector = vector } }; + struct irq_info *info = info_for_irq(irq); + + xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0); + + info->u.pirq.pirq = pirq; + info->u.pirq.gsi = gsi; + info->u.pirq.vector = vector; + info->u.pirq.flags = flags; } /* * Accessors for packed IRQ information. */ -static struct irq_info *info_for_irq(unsigned irq) -{ - return &irq_info[irq]; -} - static unsigned int evtchn_from_irq(unsigned irq) { if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq))) @@ -212,26 +243,6 @@ static unsigned pirq_from_irq(unsigned irq) return info->u.pirq.pirq; } -static unsigned gsi_from_irq(unsigned irq) -{ - struct irq_info *info = info_for_irq(irq); - - BUG_ON(info == NULL); - BUG_ON(info->type != IRQT_PIRQ); - - return info->u.pirq.gsi; -} - -static unsigned vector_from_irq(unsigned irq) -{ - struct irq_info *info = info_for_irq(irq); - - BUG_ON(info == NULL); - BUG_ON(info->type != IRQT_PIRQ); - - return info->u.pirq.vector; -} - static enum xen_irq_type type_from_irq(unsigned irq) { return info_for_irq(irq)->type; @@ -267,7 +278,7 @@ static inline unsigned long active_evtchns(unsigned int cpu, unsigned int idx) { return (sh->evtchn_pending[idx] & - cpu_evtchn_mask(cpu)[idx] & + per_cpu(cpu_evtchn_mask, cpu)[idx] & ~sh->evtchn_mask[idx]); } @@ -280,28 +291,28 @@ static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu)); #endif - clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq))); - set_bit(chn, cpu_evtchn_mask(cpu)); + clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_from_irq(irq))); + set_bit(chn, per_cpu(cpu_evtchn_mask, cpu)); - irq_info[irq].cpu = cpu; + info_for_irq(irq)->cpu = cpu; } static void init_evtchn_cpu_bindings(void) { int i; #ifdef CONFIG_SMP - struct irq_desc *desc; + struct irq_info *info; /* By default all event channels notify CPU#0. */ - for_each_irq_desc(i, desc) { + list_for_each_entry(info, &xen_irq_list_head, list) { + struct irq_desc *desc = irq_to_desc(info->irq); cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); } #endif for_each_possible_cpu(i) - memset(cpu_evtchn_mask(i), - (i == 0) ? ~0 : 0, sizeof(struct cpu_evtchn_s)); - + memset(per_cpu(cpu_evtchn_mask, i), + (i == 0) ? ~0 : 0, sizeof(*per_cpu(cpu_evtchn_mask, i))); } static inline void clear_evtchn(int port) @@ -376,7 +387,28 @@ static void unmask_evtchn(int port) put_cpu(); } -static int xen_allocate_irq_dynamic(void) +static void xen_irq_init(unsigned irq) +{ + struct irq_info *info; + struct irq_desc *desc = irq_to_desc(irq); + +#ifdef CONFIG_SMP + /* By default all event channels notify CPU#0. */ + cpumask_copy(desc->irq_data.affinity, cpumask_of(0)); +#endif + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) + panic("Unable to allocate metadata for IRQ%d\n", irq); + + info->type = IRQT_UNBOUND; + + set_irq_data(irq, info); + + list_add_tail(&info->list, &xen_irq_list_head); +} + +static int __must_check xen_allocate_irq_dynamic(void) { int first = 0; int irq; @@ -393,22 +425,14 @@ static int xen_allocate_irq_dynamic(void) first = get_nr_irqs_gsi(); #endif -retry: irq = irq_alloc_desc_from(first, -1); - if (irq == -ENOMEM && first > NR_IRQS_LEGACY) { - printk(KERN_ERR "Out of dynamic IRQ space and eating into GSI space. You should increase nr_irqs\n"); - first = max(NR_IRQS_LEGACY, first - NR_IRQS_LEGACY); - goto retry; - } - - if (irq < 0) - panic("No available IRQ to bind to: increase nr_irqs!\n"); + xen_irq_init(irq); return irq; } -static int xen_allocate_irq_gsi(unsigned gsi) +static int __must_check xen_allocate_irq_gsi(unsigned gsi) { int irq; @@ -423,17 +447,25 @@ static int xen_allocate_irq_gsi(unsigned gsi) /* Legacy IRQ descriptors are already allocated by the arch. */ if (gsi < NR_IRQS_LEGACY) - return gsi; + irq = gsi; + else + irq = irq_alloc_desc_at(gsi, -1); - irq = irq_alloc_desc_at(gsi, -1); - if (irq < 0) - panic("Unable to allocate to IRQ%d (%d)\n", gsi, irq); + xen_irq_init(irq); return irq; } static void xen_free_irq(unsigned irq) { + struct irq_info *info = get_irq_data(irq); + + list_del(&info->list); + + set_irq_data(irq, NULL); + + kfree(info); + /* Legacy IRQ descriptors are managed by the arch. */ if (irq < NR_IRQS_LEGACY) return; @@ -563,51 +595,39 @@ static void ack_pirq(struct irq_data *data) static int find_irq_by_gsi(unsigned gsi) { - int irq; + struct irq_info *info; - for (irq = 0; irq < nr_irqs; irq++) { - struct irq_info *info = info_for_irq(irq); - - if (info == NULL || info->type != IRQT_PIRQ) + list_for_each_entry(info, &xen_irq_list_head, list) { + if (info->type != IRQT_PIRQ) continue; - if (gsi_from_irq(irq) == gsi) - return irq; + if (info->u.pirq.gsi == gsi) + return info->irq; } return -1; } -int xen_allocate_pirq(unsigned gsi, int shareable, char *name) +int xen_allocate_pirq_gsi(unsigned gsi) { - return xen_map_pirq_gsi(gsi, gsi, shareable, name); + return gsi; } -/* xen_map_pirq_gsi might allocate irqs from the top down, as a - * consequence don't assume that the irq number returned has a low value - * or can be used as a pirq number unless you know otherwise. - * - * One notable exception is when xen_map_pirq_gsi is called passing an - * hardware gsi as argument, in that case the irq number returned - * matches the gsi number passed as second argument. +/* + * Do not make any assumptions regarding the relationship between the + * IRQ number returned here and the Xen pirq argument. * * Note: We don't assign an event channel until the irq actually started * up. Return an existing irq if we've already got one for the gsi. */ -int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) +int xen_bind_pirq_gsi_to_irq(unsigned gsi, + unsigned pirq, int shareable, char *name) { - int irq = 0; + int irq = -1; struct physdev_irq irq_op; spin_lock(&irq_mapping_update_lock); - if ((pirq > nr_irqs) || (gsi > nr_irqs)) { - printk(KERN_WARNING "xen_map_pirq_gsi: %s %s is incorrect!\n", - pirq > nr_irqs ? "pirq" :"", - gsi > nr_irqs ? "gsi" : ""); - goto out; - } - irq = find_irq_by_gsi(gsi); if (irq != -1) { printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", @@ -616,6 +636,8 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) } irq = xen_allocate_irq_gsi(gsi); + if (irq < 0) + goto out; set_irq_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, name); @@ -633,9 +655,8 @@ int xen_map_pirq_gsi(unsigned pirq, unsigned gsi, int shareable, char *name) goto out; } - irq_info[irq] = mk_pirq_info(0, pirq, gsi, irq_op.vector); - irq_info[irq].u.pirq.flags |= shareable ? PIRQ_SHAREABLE : 0; - pirq_to_irq[pirq] = irq; + xen_irq_info_pirq_init(irq, 0, pirq, gsi, irq_op.vector, + shareable ? PIRQ_SHAREABLE : 0); out: spin_unlock(&irq_mapping_update_lock); @@ -672,9 +693,8 @@ int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, set_irq_chip_and_handler_name(irq, &xen_pirq_chip, handle_level_irq, name); - irq_info[irq] = mk_pirq_info(0, pirq, 0, vector); - pirq_to_irq[pirq] = irq; - ret = set_irq_msi(irq, msidesc); + xen_irq_info_pirq_init(irq, 0, pirq, 0, vector, 0); + ret = irq_set_msi_desc(irq, msidesc); if (ret < 0) goto error_irq; out: @@ -709,9 +729,6 @@ int xen_destroy_irq(int irq) goto out; } } - pirq_to_irq[info->u.pirq.pirq] = -1; - - irq_info[irq] = mk_unbound_info(); xen_free_irq(irq); @@ -720,19 +737,26 @@ out: return rc; } -int xen_vector_from_irq(unsigned irq) +int xen_irq_from_pirq(unsigned pirq) { - return vector_from_irq(irq); -} + int irq; -int xen_gsi_from_irq(unsigned irq) -{ - return gsi_from_irq(irq); -} + struct irq_info *info; -int xen_irq_from_pirq(unsigned pirq) -{ - return pirq_to_irq[pirq]; + spin_lock(&irq_mapping_update_lock); + + list_for_each_entry(info, &xen_irq_list_head, list) { + if (info == NULL || info->type != IRQT_PIRQ) + continue; + irq = info->irq; + if (info->u.pirq.pirq == pirq) + goto out; + } + irq = -1; +out: + spin_unlock(&irq_mapping_update_lock); + + return irq; } int bind_evtchn_to_irq(unsigned int evtchn) @@ -745,14 +769,16 @@ int bind_evtchn_to_irq(unsigned int evtchn) if (irq == -1) { irq = xen_allocate_irq_dynamic(); + if (irq == -1) + goto out; set_irq_chip |