summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2009-10-13 17:22:20 -0400
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2011-07-19 20:58:01 -0400
commit30edc14bf39afde24ef7db2de66c91805db80828 (patch)
tree1cf5b6f28a3ea4159a09bcef9d11be6d427e3558
parent56299378726d5f2ba8d3c8cbbd13cb280ba45e4f (diff)
xen/pciback: xen pci backend driver.
This is the host side counterpart to the frontend driver in drivers/pci/xen-pcifront.c. The PV protocol is also implemented by frontend drivers in other OSes too, such as the BSDs. The PV protocol is rather simple. There is page shared with the guest, which has the 'struct xen_pci_sharedinfo' embossed in it. The backend has a thread that is kicked every-time the structure is changed and based on the operation field it performs specific tasks: XEN_PCI_OP_conf_[read|write]: Read/Write 0xCF8/0xCFC filtered data. (conf_space*.c) Based on which field is probed, we either enable/disable the PCI device, change power state, read VPD, etc. The major goal of this call is to provide a Physical IRQ (PIRQ) to the guest. The PIRQ is Xen hypervisor global IRQ value irrespective of the IRQ is tied in to the IO-APIC, or is a vector. For GSI type interrupts, the PIRQ==GSI holds. For MSI/MSI-X the PIRQ value != Linux IRQ number (thought PIRQ==vector). Please note, that with Xen, all interrupts (except those level shared ones) are injected directly to the guest - there is no host interaction. XEN_PCI_OP_[enable|disable]_msi[|x] (pciback_ops.c) Enables/disables the MSI/MSI-X capability of the device. These operations setup the MSI/MSI-X vectors for the guest and pass them to the frontend. When the device is activated, the interrupts are directly injected in the guest without involving the host. XEN_PCI_OP_aer_[detected|resume|mmio|slotreset]: In case of failure, perform the appropriate AER commands on the guest. Right now that is a cop-out - we just kill the guest. Besides implementing those commands, it can also - hide a PCI device from the host. When booting up, the user can specify xen-pciback.hide=(1:0:0)(BDF..) so that host does not try to use the device. The driver was lifted from linux-2.6.18.hg tree and fixed up so that it could compile under v3.0. Per suggestion from Jesse Barnes moved the driver to drivers/xen/xen-pciback. Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
-rw-r--r--drivers/xen/Kconfig43
-rw-r--r--drivers/xen/Makefile1
-rw-r--r--drivers/xen/xen-pciback/Makefile17
-rw-r--r--drivers/xen/xen-pciback/conf_space.c435
-rw-r--r--drivers/xen/xen-pciback/conf_space.h126
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability.c66
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability.h26
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability_msi.c94
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability_pm.c113
-rw-r--r--drivers/xen/xen-pciback/conf_space_capability_vpd.c40
-rw-r--r--drivers/xen/xen-pciback/conf_space_header.c318
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.c140
-rw-r--r--drivers/xen/xen-pciback/conf_space_quirks.h35
-rw-r--r--drivers/xen/xen-pciback/controller.c442
-rw-r--r--drivers/xen/xen-pciback/passthrough.c178
-rw-r--r--drivers/xen/xen-pciback/pci_stub.c1285
-rw-r--r--drivers/xen/xen-pciback/pciback.h133
-rw-r--r--drivers/xen/xen-pciback/pciback_ops.c131
-rw-r--r--drivers/xen/xen-pciback/slot.c191
-rw-r--r--drivers/xen/xen-pciback/vpci.c244
-rw-r--r--drivers/xen/xen-pciback/xenbus.c709
21 files changed, 4767 insertions, 0 deletions
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a59638b37c1a..8af0792dfd67 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -105,4 +105,47 @@ config SWIOTLB_XEN
depends on PCI
select SWIOTLB
+config XEN_PCIDEV_BACKEND
+ tristate "Xen PCI-device backend driver"
+ depends on PCI && X86 && XEN
+ depends on XEN_BACKEND
+ help
+ The PCI device backend driver allows the kernel to export arbitrary
+ PCI devices to other guests. If you select this to be a module, you
+ will need to make sure no other driver has bound to the device(s)
+ you want to make visible to other guests.
+
+choice
+ prompt "PCI Backend Mode"
+ depends on XEN_PCIDEV_BACKEND
+
+config XEN_PCIDEV_BACKEND_VPCI
+ bool "Virtual PCI"
+ help
+ This PCI Backend hides the true PCI topology and makes the frontend
+ think there is a single PCI bus with only the exported devices on it.
+ For example, a device at 03:05.0 will be re-assigned to 00:00.0. A
+ second device at 02:1a.1 will be re-assigned to 00:01.1.
+
+config XEN_PCIDEV_BACKEND_PASS
+ bool "Passthrough"
+ help
+ This PCI Backend provides a real view of the PCI topology to the
+ frontend (for example, a device at 06:01.b will still appear at
+ 06:01.b to the frontend). This is similar to how Xen 2.0.x exposed
+ PCI devices to its driver domains. This may be required for drivers
+ which depend on finding their hardward in certain bus/slot
+ locations.
+
+endchoice
+
+config XEN_PCIDEV_BE_DEBUG
+ bool "Xen PCI Backend Debugging"
+ depends on XEN_PCIDEV_BACKEND
+ default n
+ help
+ Allows to observe all of the traffic from the frontend/backend
+ when reading and writting to the configuration registers.
+ If in doubt, say no.
+
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index bbc18258ecc5..35a72ef3afac 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
diff --git a/drivers/xen/xen-pciback/Makefile b/drivers/xen/xen-pciback/Makefile
new file mode 100644
index 000000000000..106dae748cdb
--- /dev/null
+++ b/drivers/xen/xen-pciback/Makefile
@@ -0,0 +1,17 @@
+obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback.o
+
+pciback-y := pci_stub.o pciback_ops.o xenbus.o
+pciback-y += conf_space.o conf_space_header.o \
+ conf_space_capability.o \
+ conf_space_capability_vpd.o \
+ conf_space_capability_pm.o \
+ conf_space_quirks.o
+pciback-$(CONFIG_PCI_MSI) += conf_space_capability_msi.o
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_VPCI) += vpci.o
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT) += slot.o
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o
+pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o
+
+ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y)
+EXTRA_CFLAGS += -DDEBUG
+endif
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
new file mode 100644
index 000000000000..370c18e58d7a
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -0,0 +1,435 @@
+/*
+ * PCI Backend - Functions for creating a virtual configuration space for
+ * exported PCI Devices.
+ * It's dangerous to allow PCI Driver Domains to change their
+ * device's resources (memory, i/o ports, interrupts). We need to
+ * restrict changes to certain PCI Configuration registers:
+ * BARs, INTERRUPT_PIN, most registers in the header...
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_quirks.h"
+
+static int permissive;
+module_param(permissive, bool, 0644);
+
+#define DEFINE_PCI_CONFIG(op, size, type) \
+int pciback_##op##_config_##size \
+(struct pci_dev *dev, int offset, type value, void *data) \
+{ \
+ return pci_##op##_config_##size(dev, offset, value); \
+}
+
+DEFINE_PCI_CONFIG(read, byte, u8 *)
+DEFINE_PCI_CONFIG(read, word, u16 *)
+DEFINE_PCI_CONFIG(read, dword, u32 *)
+
+DEFINE_PCI_CONFIG(write, byte, u8)
+DEFINE_PCI_CONFIG(write, word, u16)
+DEFINE_PCI_CONFIG(write, dword, u32)
+
+static int conf_space_read(struct pci_dev *dev,
+ const struct config_field_entry *entry,
+ int offset, u32 *value)
+{
+ int ret = 0;
+ const struct config_field *field = entry->field;
+
+ *value = 0;
+
+ switch (field->size) {
+ case 1:
+ if (field->u.b.read)
+ ret = field->u.b.read(dev, offset, (u8 *) value,
+ entry->data);
+ break;
+ case 2:
+ if (field->u.w.read)
+ ret = field->u.w.read(dev, offset, (u16 *) value,
+ entry->data);
+ break;
+ case 4:
+ if (field->u.dw.read)
+ ret = field->u.dw.read(dev, offset, value, entry->data);
+ break;
+ }
+ return ret;
+}
+
+static int conf_space_write(struct pci_dev *dev,
+ const struct config_field_entry *entry,
+ int offset, u32 value)
+{
+ int ret = 0;
+ const struct config_field *field = entry->field;
+
+ switch (field->size) {
+ case 1:
+ if (field->u.b.write)
+ ret = field->u.b.write(dev, offset, (u8) value,
+ entry->data);
+ break;
+ case 2:
+ if (field->u.w.write)
+ ret = field->u.w.write(dev, offset, (u16) value,
+ entry->data);
+ break;
+ case 4:
+ if (field->u.dw.write)
+ ret = field->u.dw.write(dev, offset, value,
+ entry->data);
+ break;
+ }
+ return ret;
+}
+
+static inline u32 get_mask(int size)
+{
+ if (size == 1)
+ return 0xff;
+ else if (size == 2)
+ return 0xffff;
+ else
+ return 0xffffffff;
+}
+
+static inline int valid_request(int offset, int size)
+{
+ /* Validate request (no un-aligned requests) */
+ if ((size == 1 || size == 2 || size == 4) && (offset % size) == 0)
+ return 1;
+ return 0;
+}
+
+static inline u32 merge_value(u32 val, u32 new_val, u32 new_val_mask,
+ int offset)
+{
+ if (offset >= 0) {
+ new_val_mask <<= (offset * 8);
+ new_val <<= (offset * 8);
+ } else {
+ new_val_mask >>= (offset * -8);
+ new_val >>= (offset * -8);
+ }
+ val = (val & ~new_val_mask) | (new_val & new_val_mask);
+
+ return val;
+}
+
+static int pcibios_err_to_errno(int err)
+{
+ switch (err) {
+ case PCIBIOS_SUCCESSFUL:
+ return XEN_PCI_ERR_success;
+ case PCIBIOS_DEVICE_NOT_FOUND:
+ return XEN_PCI_ERR_dev_not_found;
+ case PCIBIOS_BAD_REGISTER_NUMBER:
+ return XEN_PCI_ERR_invalid_offset;
+ case PCIBIOS_FUNC_NOT_SUPPORTED:
+ return XEN_PCI_ERR_not_implemented;
+ case PCIBIOS_SET_FAILED:
+ return XEN_PCI_ERR_access_denied;
+ }
+ return err;
+}
+
+int pciback_config_read(struct pci_dev *dev, int offset, int size,
+ u32 *ret_val)
+{
+ int err = 0;
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+ int req_start, req_end, field_start, field_end;
+ /* if read fails for any reason, return 0
+ * (as if device didn't respond) */
+ u32 value = 0, tmp_val;
+
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x\n",
+ pci_name(dev), size, offset);
+
+ if (!valid_request(offset, size)) {
+ err = XEN_PCI_ERR_invalid_offset;
+ goto out;
+ }
+
+ /* Get the real value first, then modify as appropriate */
+ switch (size) {
+ case 1:
+ err = pci_read_config_byte(dev, offset, (u8 *) &value);
+ break;
+ case 2:
+ err = pci_read_config_word(dev, offset, (u16 *) &value);
+ break;
+ case 4:
+ err = pci_read_config_dword(dev, offset, &value);
+ break;
+ }
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ req_start = offset;
+ req_end = offset + size;
+ field_start = OFFSET(cfg_entry);
+ field_end = OFFSET(cfg_entry) + field->size;
+
+ if ((req_start >= field_start && req_start < field_end)
+ || (req_end > field_start && req_end <= field_end)) {
+ err = conf_space_read(dev, cfg_entry, field_start,
+ &tmp_val);
+ if (err)
+ goto out;
+
+ value = merge_value(value, tmp_val,
+ get_mask(field->size),
+ field_start - req_start);
+ }
+ }
+
+out:
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG "pciback: %s: read %d bytes at 0x%x = %x\n",
+ pci_name(dev), size, offset, value);
+
+ *ret_val = value;
+ return pcibios_err_to_errno(err);
+}
+
+int pciback_config_write(struct pci_dev *dev, int offset, int size, u32 value)
+{
+ int err = 0, handled = 0;
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+ u32 tmp_val;
+ int req_start, req_end, field_start, field_end;
+
+ if (unlikely(verbose_request))
+ printk(KERN_DEBUG
+ "pciback: %s: write request %d bytes at 0x%x = %x\n",
+ pci_name(dev), size, offset, value);
+
+ if (!valid_request(offset, size))
+ return XEN_PCI_ERR_invalid_offset;
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ req_start = offset;
+ req_end = offset + size;
+ field_start = OFFSET(cfg_entry);
+ field_end = OFFSET(cfg_entry) + field->size;
+
+ if ((req_start >= field_start && req_start < field_end)
+ || (req_end > field_start && req_end <= field_end)) {
+ tmp_val = 0;
+
+ err = pciback_config_read(dev, field_start,
+ field->size, &tmp_val);
+ if (err)
+ break;
+
+ tmp_val = merge_value(tmp_val, value, get_mask(size),
+ req_start - field_start);
+
+ err = conf_space_write(dev, cfg_entry, field_start,
+ tmp_val);
+
+ /* handled is set true here, but not every byte
+ * may have been written! Properly detecting if
+ * every byte is handled is unnecessary as the
+ * flag is used to detect devices that need
+ * special helpers to work correctly.
+ */
+ handled = 1;
+ }
+ }
+
+ if (!handled && !err) {
+ /* By default, anything not specificially handled above is
+ * read-only. The permissive flag changes this behavior so
+ * that anything not specifically handled above is writable.
+ * This means that some fields may still be read-only because
+ * they have entries in the config_field list that intercept
+ * the write and do nothing. */
+ if (dev_data->permissive || permissive) {
+ switch (size) {
+ case 1:
+ err = pci_write_config_byte(dev, offset,
+ (u8) value);
+ break;
+ case 2:
+ err = pci_write_config_word(dev, offset,
+ (u16) value);
+ break;
+ case 4:
+ err = pci_write_config_dword(dev, offset,
+ (u32) value);
+ break;
+ }
+ } else if (!dev_data->warned_on_write) {
+ dev_data->warned_on_write = 1;
+ dev_warn(&dev->dev, "Driver tried to write to a "
+ "read-only configuration space field at offset"
+ " 0x%x, size %d. This may be harmless, but if "
+ "you have problems with your device:\n"
+ "1) see permissive attribute in sysfs\n"
+ "2) report problems to the xen-devel "
+ "mailing list along with details of your "
+ "device obtained from lspci.\n", offset, size);
+ }
+ }
+
+ return pcibios_err_to_errno(err);
+}
+
+void pciback_config_free_dyn_fields(struct pci_dev *dev)
+{
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry, *t;
+ const struct config_field *field;
+
+ dev_dbg(&dev->dev, "free-ing dynamically allocated virtual "
+ "configuration space fields\n");
+ if (!dev_data)
+ return;
+
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ if (field->clean) {
+ field->clean((struct config_field *)field);
+
+ kfree(cfg_entry->data);
+
+ list_del(&cfg_entry->list);
+ kfree(cfg_entry);
+ }
+
+ }
+}
+
+void pciback_config_reset_dev(struct pci_dev *dev)
+{
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ const struct config_field_entry *cfg_entry;
+ const struct config_field *field;
+
+ dev_dbg(&dev->dev, "resetting virtual configuration space\n");
+ if (!dev_data)
+ return;
+
+ list_for_each_entry(cfg_entry, &dev_data->config_fields, list) {
+ field = cfg_entry->field;
+
+ if (field->reset)
+ field->reset(dev, OFFSET(cfg_entry), cfg_entry->data);
+ }
+}
+
+void pciback_config_free_dev(struct pci_dev *dev)
+{
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry, *t;
+ const struct config_field *field;
+
+ dev_dbg(&dev->dev, "free-ing virtual configuration space fields\n");
+ if (!dev_data)
+ return;
+
+ list_for_each_entry_safe(cfg_entry, t, &dev_data->config_fields, list) {
+ list_del(&cfg_entry->list);
+
+ field = cfg_entry->field;
+
+ if (field->release)
+ field->release(dev, OFFSET(cfg_entry), cfg_entry->data);
+
+ kfree(cfg_entry);
+ }
+}
+
+int pciback_config_add_field_offset(struct pci_dev *dev,
+ const struct config_field *field,
+ unsigned int base_offset)
+{
+ int err = 0;
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+ struct config_field_entry *cfg_entry;
+ void *tmp;
+
+ cfg_entry = kmalloc(sizeof(*cfg_entry), GFP_KERNEL);
+ if (!cfg_entry) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ cfg_entry->data = NULL;
+ cfg_entry->field = field;
+ cfg_entry->base_offset = base_offset;
+
+ /* silently ignore duplicate fields */
+ err = pciback_field_is_dup(dev, OFFSET(cfg_entry));
+ if (err)
+ goto out;
+
+ if (field->init) {
+ tmp = field->init(dev, OFFSET(cfg_entry));
+
+ if (IS_ERR(tmp)) {
+ err = PTR_ERR(tmp);
+ goto out;
+ }
+
+ cfg_entry->data = tmp;
+ }
+
+ dev_dbg(&dev->dev, "added config field at offset 0x%02x\n",
+ OFFSET(cfg_entry));
+ list_add_tail(&cfg_entry->list, &dev_data->config_fields);
+
+out:
+ if (err)
+ kfree(cfg_entry);
+
+ return err;
+}
+
+/* This sets up the device's virtual configuration space to keep track of
+ * certain registers (like the base address registers (BARs) so that we can
+ * keep the client from manipulating them directly.
+ */
+int pciback_config_init_dev(struct pci_dev *dev)
+{
+ int err = 0;
+ struct pciback_dev_data *dev_data = pci_get_drvdata(dev);
+
+ dev_dbg(&dev->dev, "initializing virtual configuration space\n");
+
+ INIT_LIST_HEAD(&dev_data->config_fields);
+
+ err = pciback_config_header_add_fields(dev);
+ if (err)
+ goto out;
+
+ err = pciback_config_capability_add_fields(dev);
+ if (err)
+ goto out;
+
+ err = pciback_config_quirks_init(dev);
+
+out:
+ return err;
+}
+
+int pciback_config_init(void)
+{
+ return pciback_config_capability_init();
+}
diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h
new file mode 100644
index 000000000000..50ebef216828
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space.h
@@ -0,0 +1,126 @@
+/*
+ * PCI Backend - Common data structures for overriding the configuration space
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#ifndef __XEN_PCIBACK_CONF_SPACE_H__
+#define __XEN_PCIBACK_CONF_SPACE_H__
+
+#include <linux/list.h>
+#include <linux/err.h>
+
+/* conf_field_init can return an errno in a ptr with ERR_PTR() */
+typedef void *(*conf_field_init) (struct pci_dev *dev, int offset);
+typedef void (*conf_field_reset) (struct pci_dev *dev, int offset, void *data);
+typedef void (*conf_field_free) (struct pci_dev *dev, int offset, void *data);
+
+typedef int (*conf_dword_write) (struct pci_dev *dev, int offset, u32 value,
+ void *data);
+typedef int (*conf_word_write) (struct pci_dev *dev, int offset, u16 value,
+ void *data);
+typedef int (*conf_byte_write) (struct pci_dev *dev, int offset, u8 value,
+ void *data);
+typedef int (*conf_dword_read) (struct pci_dev *dev, int offset, u32 *value,
+ void *data);
+typedef int (*conf_word_read) (struct pci_dev *dev, int offset, u16 *value,
+ void *data);
+typedef int (*conf_byte_read) (struct pci_dev *dev, int offset, u8 *value,
+ void *data);
+
+/* These are the fields within the configuration space which we
+ * are interested in intercepting reads/writes to and changing their
+ * values.
+ */
+struct config_field {
+ unsigned int offset;
+ unsigned int size;
+ unsigned int mask;
+ conf_field_init init;
+ conf_field_reset reset;
+ conf_field_free release;
+ void (*clean) (struct config_field *field);
+ union {
+ struct {
+ conf_dword_write write;
+ conf_dword_read read;
+ } dw;
+ struct {
+ conf_word_write write;
+ conf_word_read read;
+ } w;
+ struct {
+ conf_byte_write write;
+ conf_byte_read read;
+ } b;
+ } u;
+ struct list_head list;
+};
+
+struct config_field_entry {
+ struct list_head list;
+ const struct config_field *field;
+ unsigned int base_offset;
+ void *data;
+};
+
+#define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
+
+/* Add fields to a device - the add_fields macro expects to get a pointer to
+ * the first entry in an array (of which the ending is marked by size==0)
+ */
+int pciback_config_add_field_offset(struct pci_dev *dev,
+ const struct config_field *field,
+ unsigned int offset);
+
+static inline int pciback_config_add_field(struct pci_dev *dev,
+ const struct config_field *field)
+{
+ return pciback_config_add_field_offset(dev, field, 0);
+}
+
+static inline int pciback_config_add_fields(struct pci_dev *dev,
+ const struct config_field *field)
+{
+ int i, err = 0;
+ for (i = 0; field[i].size != 0; i++) {
+ err = pciback_config_add_field(dev, &field[i]);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static inline int pciback_config_add_fields_offset(struct pci_dev *dev,
+ const struct config_field *field,
+ unsigned int offset)
+{
+ int i, err = 0;
+ for (i = 0; field[i].size != 0; i++) {
+ err = pciback_config_add_field_offset(dev, &field[i], offset);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+/* Read/Write the real configuration space */
+int pciback_read_config_byte(struct pci_dev *dev, int offset, u8 *value,
+ void *data);
+int pciback_read_config_word(struct pci_dev *dev, int offset, u16 *value,
+ void *data);
+int pciback_read_config_dword(struct pci_dev *dev, int offset, u32 *value,
+ void *data);
+int pciback_write_config_byte(struct pci_dev *dev, int offset, u8 value,
+ void *data);
+int pciback_write_config_word(struct pci_dev *dev, int offset, u16 value,
+ void *data);
+int pciback_write_config_dword(struct pci_dev *dev, int offset, u32 value,
+ void *data);
+
+int pciback_config_capability_init(void);
+
+int pciback_config_header_add_fields(struct pci_dev *dev);
+int pciback_config_capability_add_fields(struct pci_dev *dev);
+
+#endif /* __XEN_PCIBACK_CONF_SPACE_H__ */
diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c
new file mode 100644
index 000000000000..0ea84d6335f4
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_capability.c
@@ -0,0 +1,66 @@
+/*
+ * PCI Backend - Handles the virtual fields found on the capability lists
+ * in the configuration space.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include "pciback.h"
+#include "conf_space.h"
+#include "conf_space_capability.h"
+
+static LIST_HEAD(capabilities);
+
+static const struct config_field caplist_header[] = {
+ {
+ .offset = PCI_CAP_LIST_ID,
+ .size = 2, /* encompass PCI_CAP_LIST_ID & PCI_CAP_LIST_NEXT */
+ .u.w.read = pciback_read_config_word,
+ .u.w.write = NULL,
+ },
+ {}
+};
+
+static inline void register_capability(struct pciback_config_capability *cap)
+{
+ list_add_tail(&cap->cap_list, &capabilities);
+}
+
+int pciback_config_capability_add_fields(struct pci_dev *dev)
+{
+ int err = 0;
+ struct pciback_config_capability *cap;
+ int cap_offset;
+
+ list_for_each_entry(cap, &capabilities, cap_list) {
+ cap_offset = pci_find_capability(dev, cap->capability);
+ if (cap_offset) {
+ dev_dbg(&dev->dev, "Found capability 0x%x at 0x%x\n",
+ cap->capability, cap_offset);
+
+ err = pciback_config_add_fields_offset(dev,
+ caplist_header,
+ cap_offset);
+ if (err)
+ goto out;
+ err = pciback_config_add_fields_offset(dev,
+ cap->fields,
+ cap_offset);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ return err;
+}
+
+int pciback_config_capability_init(void)
+{
+ register_capability(&pciback_config_capability_vpd);
+ register_capability(&pciback_config_capability_pm);
+
+ return 0;
+}
diff --git a/drivers/xen/xen-pciback/conf_space_capability.h b/drivers/xen/xen-pciback/conf_space_capability.h
new file mode 100644
index 000000000000..8da3ac415f29
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_capability.h
@@ -0,0 +1,26 @@
+/*
+ * PCI Backend - Data structures for special overlays for structures on
+ * the capability list.
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#ifndef __PCIBACK_CONFIG_CAPABILITY_H__
+#define __PCIBACK_CONFIG_CAPABILITY_H__
+
+#include <linux/pci.h>
+#include <linux/list.h>
+
+struct pciback_config_capability {
+ struct list_head cap_list;
+
+ int capability;
+
+ /* If the device has the capability found above, add these fields */
+ const struct config_field *fields;
+};
+
+extern struct pciback_config_capability pciback_config_capability_vpd;
+extern struct pciback_config_capability pciback_config_capability_pm;
+
+#endif
diff --git a/drivers/xen/xen-pciback/conf_space_capability_msi.c b/drivers/xen/xen-pciback/conf_space_capability_msi.c
new file mode 100644
index 000000000000..78f74b1852d4
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_capability_msi.c
@@ -0,0 +1,94 @@
+/*
+ * PCI Backend -- Configuration overlay for MSI capability
+ */
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include "conf_space.h"
+#include "conf_space_capability.h"
+#include <xen/interface/io/pciif.h>
+#include <xen/events.h>
+#include "pciback.h"
+
+int pciback_enable_msi(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+{
+ int otherend = pdev->xdev->otherend_id;
+ int status;
+
+ status = pci_enable_msi(dev);
+
+ if (status) {
+ printk(KERN_ERR "error enable msi for guest %x status %x\n",
+ otherend, status);
+ op->value = 0;
+ return XEN_PCI_ERR_op_failed;
+ }
+
+ /* The value the guest needs is actually the IDT vector, not the
+ * the local domain's IRQ number. */
+
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+ return 0;
+}
+
+int pciback_disable_msi(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+{
+ pci_disable_msi(dev);
+
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+ return 0;
+}
+
+int pciback_enable_msix(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+{
+ int i, result;
+ struct msix_entry *entries;
+
+ if (op->value > SH_INFO_MAX_VEC)
+ return -EINVAL;
+
+ entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL);
+ if (entries == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < op->value; i++) {
+ entries[i].entry = op->msix_entries[i].entry;
+ entries[i].vector = op->msix_entries[i].vector;
+ }
+
+ result = pci_enable_msix(dev, entries, op->value);
+
+ if (result == 0) {
+ for (i = 0; i < op->value; i++) {
+ op->msix_entries[i].entry = entries[i].entry;
+ if (entries[i].vector)
+ op->msix_entries[i].vector =
+ xen_pirq_from_irq(entries[i].vector);
+ }
+ } else {
+ printk(KERN_WARNING "pciback: %s: failed to enable MSI-X: err %d!\n",
+ pci_name(dev), result);
+ }
+ kfree(entries);
+
+ op->value = result;
+
+ return result;
+}
+
+int pciback_disable_msix(struct pciback_device *pdev,
+ struct pci_dev *dev, struct xen_pci_op *op)
+{
+
+ pci_disable_msix(dev);
+
+ /*
+ * SR-IOV devices (which don't have any legacy IRQ) have
+ * an undefined IRQ value of zero.
+ */
+ op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0;
+ return 0;
+}
+
diff --git a/drivers/xen/xen-pciback/conf_space_capability_pm.c b/drivers/xen/xen-pciback/conf_space_capability_pm.c
new file mode 100644
index 000000000000..04426165a9e5
--- /dev/null
+++ b/drivers/xen/xen-pciback/conf_space_capability_pm.c
@@ -0,0 +1,113 @@
+/*
+ * PCI Backend - Configuration space overlay for power management
+ *
+ * Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+
+#include <linux/pci.h>
+#include "conf_space.h"
+#include "conf_space_capability.h"
+
+static int pm_caps_read(struct pci_dev *dev, int offset, u16 *value,
+ void *data)
+{
+ int err;
+ u16 real_value;
+
+ err = pci_read_config_word(dev, offset, &real_value);
+ if (err)
+ goto out;
+
+ *value = real_value & ~PCI_PM_CAP_PME_MASK;
+
+out:
+ return err;
+}
+
+/* PM_OK_BITS specifies the bits that the driver domain is allowed to change.
+ * Can't allow driver domain to enable PMEs - they're shared */
+#define PM_OK_BITS (PCI_PM_CTRL_PME_STATUS|PCI_PM_CTRL_DATA_SEL_MASK)
+
+static int pm_ctrl_write(struct pci_dev *dev, int offset, u16 new_value,
+ void *data)
+{
+ int err;
+ u16 old_value;
+ pci_power_t new_state, old_state;
+
+ err = pci_read_config_word(dev, offset, &old_value);
+ if (err)
+ goto out;
+
+ old_state = (pci_power_t)(old_value & PCI_PM_CTRL_STATE_MASK);
+ new_state = (pci_power_t)(new_value & PCI_PM_CTRL_STATE_MASK);
+
+ new_value &= PM_OK_BITS;
+ if ((old_value & PM_OK_BITS) != new_value) {
+ new_value = (old_value & ~PM_OK_BITS) | new_value;
+ err = pci_write_config_word(dev, offset, new_value);
+ if (err)
+ goto out;
+ }
+
+ /* Let pci core handle the power management change */
+ dev_dbg(&dev->dev, "set power state to %x\n", new_state);
+ err = pci_set_power_state(dev, new_state);
+ if (err) {
+ err = PCIBIOS_SET_FAILED;
+ goto out;
+ }
+
+ out:
+ return err;
+}
+
+/* Ensure PMEs are disabled */
+static void *pm_ctrl_init(struct pci_dev *dev, int offset)
+{
+ int err;
+ u16 value;
+
+ err = pci_read_config_word(dev, offset, &value);
+ if (err)
+ goto out;
+
+ if (value & PCI_PM_CTRL_PME_ENABLE) {
+ value &= ~PCI_PM_CTRL_PME_ENABLE;
+ err = pci_write_config_word(dev, offset, value);
+ }
+
+out:
+ return ERR_PTR(err);
+}
+
+static const struct config_field caplist_pm[] = {
+ {
+ .offset = PCI_PM_PMC,
+ .size = 2,
+ .u.w.read = pm_caps_read,
+ },
+ {
+ .offset = PCI_PM_CTRL,
+ .size = 2,
+ .init = pm_ctrl_init,
+ .u.w.read = pcibac