Merge tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull Xen updates from Konrad Rzeszutek Wilk: "Two major features that Xen community is excited about: The first is event channel scalability by David Vrabel - we switch over from an two-level per-cpu bitmap of events (IRQs) - to an FIFO queue with priorities. This lets us be able to handle more events, have lower latency, and better scalability. Good stuff. The other is PVH by Mukesh Rathor. In short, PV is a mode where the kernel lets the hypervisor program page-tables, segments, etc. With EPT/NPT capabilities in current processors, the overhead of doing this in an HVM (Hardware Virtual Machine) container is much lower than the hypervisor doing it for us. In short we let a PV guest run without doing page-table, segment, syscall, etc updates through the hypervisor - instead it is all done within the guest container. It is a "hybrid" PV - hence the 'PVH' name - a PV guest within an HVM container. The major benefits are less code to deal with - for example we only use one function from the the pv_mmu_ops (which has 39 function calls); faster performance for syscall (no context switches into the hypervisor); less traps on various operations; etc. It is still being baked - the ABI is not yet set in stone. But it is pretty awesome and we are excited about it. Lastly, there are some changes to ARM code - you should get a simple conflict which has been resolved in #linux-next. In short, this pull has awesome features. Features: - FIFO event channels. Key advantages: support for over 100,000 events (2^17), 16 different event priorities, improved fairness in event latency through the use of FIFOs. - Xen PVH support. "It’s a fully PV kernel mode, running with paravirtualized disk and network, paravirtualized interrupts and timers, no emulated devices of any kind (and thus no qemu), no BIOS or legacy boot — but instead of requiring PV MMU, it uses the HVM hardware extensions to virtualize the pagetables, as well as system calls and other privileged operations." (from "The Paravirtualization Spectrum, Part 2: From poles to a spectrum") Bug-fixes: - Fixes in balloon driver (refactor and make it work under ARM) - Allow xenfb to be used in HVM guests. - Allow xen_platform_pci=0 to work properly. - Refactors in event channels" * tag 'stable/for-linus-3.14-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip: (52 commits) xen/pvh: Set X86_CR0_WP and others in CR0 (v2) MAINTAINERS: add git repository for Xen xen/pvh: Use 'depend' instead of 'select'. xen: delete new instances of __cpuinit usage xen/fb: allow xenfb initialization for hvm guests xen/evtchn_fifo: fix error return code in evtchn_fifo_setup() xen-platform: fix error return code in platform_pci_init() xen/pvh: remove duplicated include from enlighten.c xen/pvh: Fix compile issues with xen_pvh_domain() xen: Use dev_is_pci() to check whether it is pci device xen/grant-table: Force to use v1 of grants. xen/pvh: Support ParaVirtualized Hardware extensions (v3). xen/pvh: Piggyback on PVHVM XenBus. xen/pvh: Piggyback on PVHVM for grant driver (v4) xen/grant: Implement an grant frame array struct (v3). xen/grant-table: Refactor gnttab_init xen/grants: Remove gnttab_max_grant_frames dependency on gnttab_init. xen/pvh: Piggyback on PVHVM for event channels (v2) xen/pvh: Update E820 to work with PVH (v2) xen/pvh: Secondary VCPU bringup (non-bootup CPUs) ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2014-01-22 22:00:18 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-01-22 22:00:18 -0800
commit: 84621c9b18d0bb6cb267e3395c7f3131ecf4d39c (patch)
tree: 28566fe0211798143136b5cd154e2239d38a7b68 /drivers/xen
parent: 7ebd3faa9b5b42caf2d5aa1352a93dcfa0098011 (diff)
parent: c9f6e9977e38de15da96b732a8dec0ef56cbf977 (diff)
16 files changed, 1328 insertions, 551 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 12ba6db65142..38fb36e1c592 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -3,7 +3,6 @@ menu "Xen driver support"
 
 config XEN_BALLOON
 	bool "Xen memory balloon driver"
-	depends on !ARM
 	default y
 	help
 	  The balloon driver allows the Xen domain to request more memory from
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 14fe79d8634a..d75c811bfa56 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -2,7 +2,8 @@ ifeq ($(filter y, $(CONFIG_ARM) $(CONFIG_ARM64)),)
 obj-$(CONFIG_HOTPLUG_CPU)		+= cpu_hotplug.o
 endif
 obj-$(CONFIG_X86)			+= fallback.o
-obj-y	+= grant-table.o features.o events.o balloon.o manage.o
+obj-y	+= grant-table.o features.o balloon.o manage.o
+obj-y	+= events/
 obj-y	+= xenbus/
 
 nostackp := $(call cc-option, -fno-stack-protector)
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 4c02e2b94103..37d06ea624aa 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -157,13 +157,6 @@ static struct page *balloon_retrieve(bool prefer_highmem)
 	return page;
 }
 
-static struct page *balloon_first_page(void)
-{
-	if (list_empty(&ballooned_pages))
-		return NULL;
-	return list_entry(ballooned_pages.next, struct page, lru);
-}
-
 static struct page *balloon_next_page(struct page *page)
 {
 	struct list_head *next = page->lru.next;
@@ -328,7 +321,7 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
 	if (nr_pages > ARRAY_SIZE(frame_list))
 		nr_pages = ARRAY_SIZE(frame_list);
 
-	page = balloon_first_page();
+	page = list_first_entry_or_null(&ballooned_pages, struct page, lru);
 	for (i = 0; i < nr_pages; i++) {
 		if (!page) {
 			nr_pages = i;
diff --git a/drivers/xen/dbgp.c b/drivers/xen/dbgp.c
index f3ccc80a455f..8145a59fd9f6 100644
--- a/drivers/xen/dbgp.c
+++ b/drivers/xen/dbgp.c
@@ -19,7 +19,7 @@ static int xen_dbgp_op(struct usb_hcd *hcd, int op)
 	dbgp.op = op;
 
 #ifdef CONFIG_PCI
-	if (ctrlr->bus == &pci_bus_type) {
+	if (dev_is_pci(ctrlr)) {
 		const struct pci_dev *pdev = to_pci_dev(ctrlr);
 
 		dbgp.u.pci.seg = pci_domain_nr(pdev->bus);
diff --git a/drivers/xen/events/Makefile b/drivers/xen/events/Makefile
new file mode 100644
index 000000000000..62be55cd981d
--- /dev/null
+++ b/drivers/xen/events/Makefile
@@ -0,0 +1,5 @@
+obj-y += events.o
+
+events-y += events_base.o
+events-y += events_2l.o
+events-y += events_fifo.o
diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c
new file mode 100644
index 000000000000..d7ff91757307
--- /dev/null
+++ b/drivers/xen/events/events_2l.c
@@ -0,0 +1,372 @@
+/*
+ * Xen event channels (2-level ABI)
+ *
+ * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/linkage.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+
+#include <asm/sync_bitops.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/events.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+
+#include "events_internal.h"
+
+/*
+ * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
+ * careful to only use bitops which allow for this (e.g
+ * test_bit/find_first_bit and friends but not __ffs) and to pass
+ * BITS_PER_EVTCHN_WORD as the bitmask length.
+ */
+#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
+/*
+ * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
+ * array. Primarily to avoid long lines (hence the terse name).
+ */
+#define BM(x) (unsigned long *)(x)
+/* Find the first set bit in a evtchn mask */
+#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
+
+static DEFINE_PER_CPU(xen_ulong_t [EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD],
+		      cpu_evtchn_mask);
+
+static unsigned evtchn_2l_max_channels(void)
+{
+	return EVTCHN_2L_NR_CHANNELS;
+}
+
+static void evtchn_2l_bind_to_cpu(struct irq_info *info, unsigned cpu)
+{
+	clear_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, info->cpu)));
+	set_bit(info->evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+}
+
+static void evtchn_2l_clear_pending(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static void evtchn_2l_set_pending(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	sync_set_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_is_pending(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+}
+
+static bool evtchn_2l_test_and_set_mask(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_mask(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	sync_set_bit(port, BM(&s->evtchn_mask[0]));
+}
+
+static void evtchn_2l_unmask(unsigned port)
+{
+	struct shared_info *s = HYPERVISOR_shared_info;
+	unsigned int cpu = get_cpu();
+	int do_hypercall = 0, evtchn_pending = 0;
+
+	BUG_ON(!irqs_disabled());
+
+	if (unlikely((cpu != cpu_from_evtchn(port))))
+		do_hypercall = 1;
+	else {
+		/*
+		 * Need to clear the mask before checking pending to
+		 * avoid a race with an event becoming pending.
+		 *
+		 * EVTCHNOP_unmask will only trigger an upcall if the
+		 * mask bit was set, so if a hypercall is needed
+		 * remask the event.
+		 */
+		sync_clear_bit(port, BM(&s->evtchn_mask[0]));
+		evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
+
+		if (unlikely(evtchn_pending && xen_hvm_domain())) {
+			sync_set_bit(port, BM(&s->evtchn_mask[0]));
+			do_hypercall = 1;
+		}
+	}
+
+	/* Slow path (hypercall) if this is a non-local port or if this is
+	 * an hvm domain and an event is pending (hvm domains don't have
+	 * their own implementation of irq_enable). */
+	if (do_hypercall) {
+		struct evtchn_unmask unmask = { .port = port };
+		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+	} else {
+		struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+		/*
+		 * The following is basically the equivalent of
+		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
+		 * the interrupt edge' if the channel is masked.
+		 */
+		if (evtchn_pending &&
+		    !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
+					   BM(&vcpu_info->evtchn_pending_sel)))
+			vcpu_info->evtchn_upcall_pending = 1;
+	}
+
+	put_cpu();
+}
+
+static DEFINE_PER_CPU(unsigned int, current_word_idx);
+static DEFINE_PER_CPU(unsigned int, current_bit_idx);
+
+/*
+ * Mask out the i least significant bits of w
+ */
+#define MASK_LSBS(w, i) (w & ((~((xen_ulong_t)0UL)) << i))
+
+static inline xen_ulong_t active_evtchns(unsigned int cpu,
+					 struct shared_info *sh,
+					 unsigned int idx)
+{
+	return sh->evtchn_pending[idx] &
+		per_cpu(cpu_evtchn_mask, cpu)[idx] &
+		~sh->evtchn_mask[idx];
+}
+
+/*
+ * Search the CPU's pending events bitmasks.  For each one found, map
+ * the event number to an irq, and feed it into do_IRQ() for handling.
+ *
+ * Xen uses a two-level bitmap to speed searching.  The first level is
+ * a bitset of words which contain pending event bits.  The second
+ * level is a bitset of pending events themselves.
+ */
+static void evtchn_2l_handle_events(unsigned cpu)
+{
+	int irq;
+	xen_ulong_t pending_words;
+	xen_ulong_t pending_bits;
+	int start_word_idx, start_bit_idx;
+	int word_idx, bit_idx;
+	int i;
+	struct irq_desc *desc;
+	struct shared_info *s = HYPERVISOR_shared_info;
+	struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
+
+	/* Timer interrupt has highest priority. */
+	irq = irq_from_virq(cpu, VIRQ_TIMER);
+	if (irq != -1) {
+		unsigned int evtchn = evtchn_from_irq(irq);
+		word_idx = evtchn / BITS_PER_LONG;
+		bit_idx = evtchn % BITS_PER_LONG;
+		if (active_evtchns(cpu, s, word_idx) & (1ULL << bit_idx)) {
+			desc = irq_to_desc(irq);
+			if (desc)
+				generic_handle_irq_desc(irq, desc);
+		}
+	}
+
+	/*
+	 * Master flag must be cleared /before/ clearing
+	 * selector flag. xchg_xen_ulong must contain an
+	 * appropriate barrier.
+	 */
+	pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0);
+
+	start_word_idx = __this_cpu_read(current_word_idx);
+	start_bit_idx = __this_cpu_read(current_bit_idx);
+
+	word_idx = start_word_idx;
+
+	for (i = 0; pending_words != 0; i++) {
+		xen_ulong_t words;
+
+		words = MASK_LSBS(pending_words, word_idx);
+
+		/*
+		 * If we masked out all events, wrap to beginning.
+		 */
+		if (words == 0) {
+			word_idx = 0;
+			bit_idx = 0;
+			continue;
+		}
+		word_idx = EVTCHN_FIRST_BIT(words);
+
+		pending_bits = active_evtchns(cpu, s, word_idx);
+		bit_idx = 0; /* usually scan entire word from start */
+		/*
+		 * We scan the starting word in two parts.
+		 *
+		 * 1st time: start in the middle, scanning the
+		 * upper bits.
+		 *
+		 * 2nd time: scan the whole word (not just the
+		 * parts skipped in the first pass) -- if an
+		 * event in the previously scanned bits is
+		 * pending again it would just be scanned on
+		 * the next loop anyway.
+		 */
+		if (word_idx == start_word_idx) {
+			if (i == 0)
+				bit_idx = start_bit_idx;
+		}
+
+		do {
+			xen_ulong_t bits;
+			int port;
+
+			bits = MASK_LSBS(pending_bits, bit_idx);
+
+			/* If we masked out all events, move on. */
+			if (bits == 0)
+				break;
+
+			bit_idx = EVTCHN_FIRST_BIT(bits);
+
+			/* Process port. */
+			port = (word_idx * BITS_PER_EVTCHN_WORD) + bit_idx;
+			irq = get_evtchn_to_irq(port);
+
+			if (irq != -1) {
+				desc = irq_to_desc(irq);
+				if (desc)
+					generic_handle_irq_desc(irq, desc);
+			}
+
+			bit_idx = (bit_idx + 1) % BITS_PER_EVTCHN_WORD;
+
+			/* Next caller starts at last processed + 1 */
+			__this_cpu_write(current_word_idx,
+					 bit_idx ? word_idx :
+					 (word_idx+1) % BITS_PER_EVTCHN_WORD);
+			__this_cpu_write(current_bit_idx, bit_idx);
+		} while (bit_idx != 0);
+
+		/* Scan start_l1i twice; all others once. */
+		if ((word_idx != start_word_idx) || (i != 0))
+			pending_words &= ~(1UL << word_idx);
+
+		word_idx = (word_idx + 1) % BITS_PER_EVTCHN_WORD;
+	}
+}
+
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+	struct shared_info *sh = HYPERVISOR_shared_info;
+	int cpu = smp_processor_id();
+	xen_ulong_t *cpu_evtchn = per_cpu(cpu_evtchn_mask, cpu);
+	int i;
+	unsigned long flags;
+	static DEFINE_SPINLOCK(debug_lock);
+	struct vcpu_info *v;
+
+	spin_lock_irqsave(&debug_lock, flags);
+
+	printk("\nvcpu %d\n  ", cpu);
+
+	for_each_online_cpu(i) {
+		int pending;
+		v = per_cpu(xen_vcpu, i);
+		pending = (get_irq_regs() && i == cpu)
+			? xen_irqs_disabled(get_irq_regs())
+			: v->evtchn_upcall_mask;
+		printk("%d: masked=%d pending=%d event_sel %0*"PRI_xen_ulong"\n  ", i,
+		       pending, v->evtchn_upcall_pending,
+		       (int)(sizeof(v->evtchn_pending_sel)*2),
+		       v->evtchn_pending_sel);
+	}
+	v = per_cpu(xen_vcpu, cpu);
+
+	printk("\npending:\n   ");
+	for (i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+		printk("%0*"PRI_xen_ulong"%s",
+		       (int)sizeof(sh->evtchn_pending[0])*2,
+		       sh->evtchn_pending[i],
+		       i % 8 == 0 ? "\n   " : " ");
+	printk("\nglobal mask:\n   ");
+	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+		printk("%0*"PRI_xen_ulong"%s",
+		       (int)(sizeof(sh->evtchn_mask[0])*2),
+		       sh->evtchn_mask[i],
+		       i % 8 == 0 ? "\n   " : " ");
+
+	printk("\nglobally unmasked:\n   ");
+	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+		printk("%0*"PRI_xen_ulong"%s",
+		       (int)(sizeof(sh->evtchn_mask[0])*2),
+		       sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+		       i % 8 == 0 ? "\n   " : " ");
+
+	printk("\nlocal cpu%d mask:\n   ", cpu);
+	for (i = (EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD)-1; i >= 0; i--)
+		printk("%0*"PRI_xen_ulong"%s", (int)(sizeof(cpu_evtchn[0])*2),
+		       cpu_evtchn[i],
+		       i % 8 == 0 ? "\n   " : " ");
+
+	printk("\nlocally unmasked:\n   ");
+	for (i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--) {
+		xen_ulong_t pending = sh->evtchn_pending[i]
+			& ~sh->evtchn_mask[i]
+			& cpu_evtchn[i];
+		printk("%0*"PRI_xen_ulong"%s",
+		       (int)(sizeof(sh->evtchn_mask[0])*2),
+		       pending, i % 8 == 0 ? "\n   " : " ");
+	}
+
+	printk("\npending list:\n");
+	for (i = 0; i < EVTCHN_2L_NR_CHANNELS; i++) {
+		if (sync_test_bit(i, BM(sh->evtchn_pending))) {
+			int word_idx = i / BITS_PER_EVTCHN_WORD;
+			printk("  %d: event %d -> irq %d%s%s%s\n",
+			       cpu_from_evtchn(i), i,
+			       get_evtchn_to_irq(i),
+			       sync_test_bit(word_idx, BM(&v->evtchn_pending_sel))
+			       ? "" : " l2-clear",
+			       !sync_test_bit(i, BM(sh->evtchn_mask))
+			       ? "" : " globally-masked",
+			       sync_test_bit(i, BM(cpu_evtchn))
+			       ? "" : " locally-masked");
+		}
+	}
+
+	spin_unlock_irqrestore(&debug_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
+static const struct evtchn_ops evtchn_ops_2l = {
+	.max_channels      = evtchn_2l_max_channels,
+	.nr_channels       = evtchn_2l_max_channels,
+	.bind_to_cpu       = evtchn_2l_bind_to_cpu,
+	.clear_pending     = evtchn_2l_clear_pending,
+	.set_pending       = evtchn_2l_set_pending,
+	.is_pending        = evtchn_2l_is_pending,
+	.test_and_set_mask = evtchn_2l_test_and_set_mask,
+	.mask              = evtchn_2l_mask,
+	.unmask            = evtchn_2l_unmask,
+	.handle_events     = evtchn_2l_handle_events,
+};
+
+void __init xen_evtchn_2l_init(void)
+{
+	pr_info("Using 2-level ABI\n");
+	evtchn_ops = &evtchn_ops_2l;
+}
diff --git a/drivers/xen/events.c b/drivers/xen/events/events_base.c
index 4035e833ea26..4672e003c0ad 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events/events_base.c
@@ -59,6 +59,10 @@
 #include <xen/interface/vcpu.h>
 #include <asm/hw_irq.h>
 
+#include "events_internal.h"
+
+const struct evtchn_ops *evtchn_ops;
+
 /*
  * This lock protects updates to the following mapping and reference-count
  * arrays. The lock does not need to be acquired to read the mapping tables.
@@ -73,71 +77,15 @@ static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
 /* IRQ <-> IPI mapping */
 static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1};
 
-/* Interrupt types. */
-enum xen_irq_type {
-	IRQT_UNBOUND = 0,
-	IRQT_PIRQ,
-	IRQT_VIRQ,
-	IRQT_IPI,
-	IRQT_EVTCHN
-};
-
-/*
- * Packed IRQ information:
- * type - enum xen_irq_type
- * event channel - irq->event channel mapping
- * cpu - cpu this event channel is bound to
- * index - type-specific information:
- *    PIRQ - physical IRQ, GSI, flags, and owner domain
- *    VIRQ - virq number
- *    IPI - IPI vector
- *    EVTCHN -
- */
-struct irq_info {
-	struct list_head list;
-	int refcnt;
-	enum xen_irq_type type;	/* type */
-	unsigned irq;
-	unsigned short evtchn;	/* event channel */
-	unsigned short cpu;	/* cpu bound */
-
-	union {
-		unsigned short virq;
-		enum ipi_vector ipi;
-		struct {
-			unsigned short pirq;
-			unsigned short gsi;
-			unsigned char flags;
-			uint16_t domid;
-		} pirq;
-	} u;
-};
-#define PIRQ_NEEDS_EOI	(1 << 0)
-#define PIRQ_SHAREABLE	(1 << 1)
-
-static int *evtchn_to_irq;
+int **evtchn_to_irq;
 #ifdef CONFIG_X86
 static unsigned long *pirq_eoi_map;
 #endif
 static bool (*pirq_needs_eoi)(unsigned irq);
 
-/*
- * Note sizeof(xen_ulong_t) can be more than sizeof(unsigned long). Be
- * careful to only use bitops which allow for this (e.g
- * test_bit/find_first_bit and friends but not __ffs) and to pass
- * BITS_PER_EVTCHN_WORD as the bitmask length.
- */
-#define BITS_PER_EVTCHN_WORD (sizeof(xen_ulong_t)*8)
-/*
- * Make a bitmask (i.e. unsigned long *) of a xen_ulong_t
- * array. Primarily to avoid long lines (hence the terse name).
- */
-#define BM(x) (unsigned long *)(x)
-/* Find the first set bit in a evtchn mask */
-#define EVTCHN_FIRST_BIT(w) find_first_bit(BM(&(w)), BITS_PER_EVTCHN_WORD)
-
-static DEFINE_PER_CPU(xen_ulong_t [NR_EVENT_CHANNELS/BITS_PER_EVTCHN_WORD],
-		      cpu_evtchn_mask);
+#define EVTCHN_ROW(e)  (e / (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_COL(e)  (e % (PAGE_SIZE/sizeof(**evtchn_to_irq)))
+#define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq))
 
 /* Xen will never allocate port zero for any purpose. */
 #define VALID_EVTCHN(chn)	((chn) != 0)
@@ -148,19 +96,75 @@ static struct irq_chip xen_pirq_chip;
 static void enable_dynirq(struct irq_data *data);
 static void disable_dynirq(struct irq_data *data);
 
+static void clear_evtchn_to_irq_row(unsigned row)
+{
+	unsigned col;
+
+	for (col = 0; col < EVTCHN_PER_ROW; col++)
+		evtchn_to_irq[row][col] = -1;
+}
+
+static void clear_evtchn_to_irq_all(void)
+{
+	unsigned row;
+
+	for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) {
+		if (evtchn_to_irq[row] == NULL)
+			continue;
+		clear_evtchn_to_irq_row(row);
+	}
+}
+
+static int set_evtchn_to_irq(unsigned evtchn, unsigned irq)
+{
+	unsigned row;
+	unsigned col;
+
+	if (evtchn >= xen_evtchn_max_channels())
+		return -EINVAL;
+
+	row = EVTCHN_ROW(evtchn);
+	col = EVTCHN_COL(evtchn);
+
+	if (evtchn_to_irq[row] == NULL) {
+		/* Unallocated irq entries return -1 anyway */
+		if (irq == -1)
+			return 0;
+
+		evtchn_to_irq[row] = (int *)get_zeroed_page(GFP_KERNEL);
+		if (evtchn_to_irq[row] == NULL)
+			return -ENOMEM;
+
+		clear_evtchn_to_irq_row(row);
+	}
+
+	evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)] = irq;
+	return 0;
+}
+
+int get_evtchn_to_irq(unsigned evtchn)
+{
+	if (evtchn >= xen_evtchn_max_channels())
+		return -1;
+	if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
+		return -1;
+	return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
+}
+
 /* Get info for IRQ */
-static struct irq_info *info_for_irq(unsigned irq)
+struct irq_info *info_for_irq(unsigned irq)
 {
 	return irq_get_handler_data(irq);
 }
 
 /* Constructors for packed IRQ information. */
-static void xen_irq_info_common_init(struct irq_info *info,
+static int xen_irq_info_common_setup(struct irq_info *info,
 				     unsigned irq,
 				     enum xen_irq_type type,
-				     unsigned short evtchn,
+				     unsigned evtchn,
 				     unsigned short cpu)
 {
+	int ret;
 
 	BUG_ON(info->type != IRQT_UNBOUND && info->type != type);
 
@@ -169,68 +173,78 @@ static void xen_irq_info_common_init(struct irq_info *info,
 	info->evtchn = evtchn;
 	info->cpu = cpu;
 
-	evtchn_to_irq[evtchn] = irq;
+	ret = set_evtchn_to_irq(evtchn, irq);
+	if (ret < 0)
+		return ret;
 
 	irq_clear_status_flags(irq, IRQ_NOREQUEST|IRQ_NOAUTOEN);
+
+	return xen_evtchn_port_setup(info);
 }
 
-static void xen_irq_info_evtchn_init(unsigned irq,
-				     unsigned short evtchn)
+static int xen_irq_info_evtchn_setup(unsigned irq,
+				     unsigned evtchn)
 {
 	struct irq_info *info = info_for_irq(irq);
 
-	xen_irq_info_common_init(info, irq, IRQT_EVTCHN, evtchn, 0);
+	return xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
 }
 
-static void xen_irq_info_ipi_init(unsigned cpu,
+static int xen_irq_info_ipi_setup(unsigned cpu,
 				  unsigned irq,
-				  unsigned short evtchn,
+				  unsigned evtchn,
 				  enum ipi_vector ipi)
 {
 	struct irq_info *info = info_for_irq(irq);
 
-	xen_irq_info_common_init(info, irq, IRQT_IPI, evtchn, 0);
-
 	info->u.ipi = ipi;
 
 	per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+
+	return xen_irq_info_common_setup(info, irq, IRQT_IPI, evtchn, 0);
 }
 
-static void xen_irq_info_virq_init(unsigned cpu,
+static int xen_irq_info_virq_setup(unsigned cpu,
 				   unsigned irq,
-				   unsigned short evtchn,
-				   unsigned short virq)
+				   unsigned evtchn,
+				   unsigned virq)
 {
 	struct irq_info *info = info_for_irq(irq);
 
-	xen_irq_info_common_init(info, irq, IRQT_VIRQ, evtchn, 0);
-
 	info->u.virq = virq;
 
 	per_cpu(virq_to_irq, cpu)[virq] = irq;
+
+	return xen_irq_info_common_setup(info, irq, IRQT_VIRQ, evtchn, 0);
 }
 
-static void xen_irq_info_pirq_init(unsigned irq,
-				   unsigned short evtchn,
-				   unsigned short pirq,
-				   unsigned short gsi,
+static int xen_irq_info_pirq_setup(unsigned irq,
+				   unsigned evtchn,
+				   unsigned pirq,
+				   unsigned gsi,
 				   uint16_t domid,
 				   unsigned char flags)
 {
 	struct irq_info *info = info_for_irq(irq);
 
-	xen_irq_info_common_init(info, irq, IRQT_PIRQ, evtchn, 0);
-
 	info->u.pirq.pirq = pirq;
 	info->u.pirq.gsi = gsi;
 	info->u.pirq.domid = domid;
 	info->u.pirq.flags = flags;
+
+	return xen_irq_info_common_setup(info, irq, IRQT_PIRQ, evtchn, 0);
+}
+
+static void xen_irq_info_cleanup(struct irq_info *info)
+{
+	set_evtchn_to_irq(info->evtchn, -1);
+	info->evtchn = 0;
 }
 
 /*
  * Accessors for packed IRQ information.
  */
-static unsigned int evtchn_from_irq(unsigned irq)
+unsigned int evtchn_from_irq(unsigned irq)
 {
 	if (unlikely(WARN(irq < 0 || irq >= nr_irqs, "Invalid irq %d!\n", irq)))
 		return 0;
@@ -240,10 +254,15 @@ static unsigned int evtchn_from_irq(unsigned irq)
 
 unsigned irq_from_evtchn(unsigned int evtchn)
 {
-	return evtchn_to_irq[evtchn];
+	return get_evtchn_to_irq(evtchn);
 }
 EXPORT_SYMBOL_GPL(irq_from_evtchn);
 
+int irq_from_virq(unsigned int cpu, unsigned int virq)
+{
+	return per_cpu(virq_to_irq, cpu)[virq];
+}
+
 static enum ipi_vector ipi_from_irq(unsigned irq)
 {
 	struct irq_info *info = info_for_irq(irq);
@@ -279,14 +298,14 @@ static enum xen_irq_type type_from_irq(unsigned irq)
 	return info_for_irq(irq)->type;
 }
 
-static unsigned cpu_from_irq(unsigned irq)
+unsigned cpu_from_irq(unsigned irq)
 {
 	return info_for_irq(irq)->cpu;
 }
 
-static unsigned int cpu_from_evtchn(unsigned int evtchn)
+unsigned int cpu_from_evtchn(unsigned int evtchn)
 {
-	int irq = evtchn_to_irq[evtchn];
+	int irq = get_evtchn_to_irq(evtchn);
 	unsigned ret = 0;
 
 	if (irq != -1)
@@ -310,67 +329,29 @@ static bool pirq_needs_eoi_flag(unsigned irq)
 	return info->u.pirq.flags & PIRQ_NEEDS_EOI;
 }
 
-static inline xen_ulong_t active_evtchns(unsigned int cpu,
-					 struct shared_info *sh,
-					 unsigned int idx)
-{
-	return sh->evtchn_pending[idx] &
-		per_cpu(cpu_evtchn_mask, cpu)[idx] &
-		~sh->evtchn_mask[idx];
-}
-
 static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
 {
-	int irq = evtchn_to_irq[chn];
+	int irq = get_evtchn_to_irq(chn);
+	struct irq_info *info = info_for_irq(irq);
 
 	BUG_ON(irq == -1);
 #ifdef CONFIG_SMP
 	cpumask_copy(irq_to_desc(irq)->irq_data.affinity, cpumask_of(cpu));
 #endif
 
-	clear_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu_from_irq(irq))));
-	set_bit(chn, BM(per_cpu(cpu_evtchn_mask, cpu)));
-
-	info_for_irq(irq)->cpu = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
-	int i;
-#ifdef CONFIG_SMP
-	struct irq_info *info;
-
-	/* By default all event channels notify CPU#0. */
-	list_for_each_entry(info, &xen_irq_list_head, list) {
-		struct irq_desc *desc = irq_to_desc(info->irq);
-		cpumask_copy(desc->irq_data.affinity, cpumask_of(0));
-	}
-#endif
-
-	for_each_possible_cpu(i)
-		memset(per_cpu(cpu_evtchn_mask, i),
-		       (i == 0) ? ~0 : 0, NR_EVENT_CHANNELS/8);
-}
+	xen_evtchn_port_bind_to_cpu(info, cpu);
 
-static inline void clear_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_clear_bit(port, BM(&s->evtchn_pending[0]));
+	info->cpu = cpu;
 }
 
-static inline void set_evtchn(int port)
+static void xen_evtchn_mask_all(void)
 {
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, BM(&s->evtchn_pending[0]));
-}
+	unsigned int evtchn;
 
-static inline int test_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	return sync_test_bit(port, BM(&s->evtchn_pending[0]));
+	for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++)
+		mask_evtchn(evtchn);
 }
 
-
 /**
  * notify_remote_via_irq - send event to remote end of event channel via irq
  * @irq: irq of event channel to send event to
@@ -388,63 +369,6 @@ void notify_remote_via_irq(int irq)
 }
 EXPORT_SYMBOL_GPL(notify_remote_via_irq);
 
-static void mask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, BM(&s->evtchn_mask[0]));
-}
-
-static void unmask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	unsigned int cpu = get_cpu();
-	int do_hypercall = 0, evtchn_pending = 0;
-
-	BUG_ON(!irqs_disabled());
-
-	if (unlikely((cpu != cpu_from_evtchn(port))))
-		do_hypercall = 1;
-	else {
-		/*
-		 * Need to clear the mask before checking pending to
-		 * avoid a race with an event becoming pending.
-		 *
-		 * EVTCHNOP_unmask will only trigger an upcall if the
-		 * mask bit was set, so if a hypercall is needed
-		 * remask the event.
-		 */
-		sync_clear_bit(port, BM(&s->evtchn_mask[0]));
-		evtchn_pending = sync_test_bit(port, BM(&s->evtchn_pending[0]));
-
-		if (unlikely(evtchn_pending && xen_hvm_domain())) {
-			sync_set_bit(port, BM(&s->evtchn_mask[0]));
-			do_hypercall = 1;
-		}
-	}
-
-	/* Slow path (hypercall) if this is a non-local port or if this is
-	 * an hvm domain and an event is pending (hvm domains don't have
-	 * their own implementation of irq_enable). */
-	if (do_hypercall) {
-		struct evtchn_unmask unmask = { .port = port };
-		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
-	} else {
-		struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
-
-		/*
-		 * The following is basically the equivalent of
-		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
-		 * the interrupt edge' if the channel is masked.
-		 */
-		if (evtchn_pending &&
-		    !sync_test_and_set_bit(port / BITS_PER_EVTCHN_WORD,
-					   BM(&vcpu_info->evtchn_pending_sel)))
-			vcpu_info->evtchn_upcall_pending = 1;
-	}
-
-	put_cpu();
-}
-
 static void xen_irq_init(unsigned irq)
 {
 	struct irq_info *info;
@@ -538,6 +462,18 @@ static void xen_free_irq(unsigned irq)
 	irq_free_desc(irq);
 }
 
+static void xen_evtchn_close(unsigned int port)
+{
+	struct evtchn_close close;
+
+	close.port = port;
+	if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+		BUG();
+
+	/* Closed ports are implicitly re-bound to VCPU0. */
+	bind_evtchn_to_cpu(port, 0);
+}
+
 static void pirq_query_unmask(int irq)
 {
 	struct physdev_irq_status_query irq_status;
@@ -610,7 +546,13 @@ static unsigned int __startup_pirq(unsigned int irq)
 
 	pirq_query_unmask(irq);
 
-	evtchn_to_irq[evtchn] = irq;
+	rc = set_evtchn_to_irq(evtchn, irq);
+	if (rc != 0) {
+		pr_err("irq%d: Failed to set port to irq mapping (%d)\n",
+		       irq, rc);
+		xen_evtchn_close(evtchn);
+		return 0;
+	}
 	bind_evtchn_to_cpu(evtchn, 0);
 	info->evtchn = evtchn;
 
@@ -628,10 +570,9 @@ static unsigned int startup_pirq(struct irq_data *data)
 
 static void shutdown_pirq(struct irq_data *data)
 {
-	struct evtchn_close close;
 	unsigned int irq = data->irq;
 	struct irq_info *info = info_for_irq(irq);
-	int evtchn = evtchn_from_irq(irq);
+	unsigned evtchn = evtchn_from_irq(irq);
 
 	BUG_ON(info->type != IRQT_PIRQ);
 
@@ -639,14 +580,8 @@ static void shutdown_pirq(struct irq_data *data)
author	Linus Torvalds <torvalds@linux-foundation.org>	2014-01-22 22:00:18 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-01-22 22:00:18 -0800
commit	84621c9b18d0bb6cb267e3395c7f3131ecf4d39c (patch)
tree	28566fe0211798143136b5cd154e2239d38a7b68 /drivers/xen
parent	7ebd3faa9b5b42caf2d5aa1352a93dcfa0098011 (diff)
parent	c9f6e9977e38de15da96b732a8dec0ef56cbf977 (diff)