summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci18
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--arch/arm/mach-omap2/devices.c7
-rw-r--r--arch/arm/plat-omap/include/plat/iommu.h31
-rw-r--r--arch/arm/plat-omap/include/plat/iovmm.h12
-rw-r--r--arch/ia64/include/asm/iommu.h2
-rw-r--r--arch/ia64/kernel/pci-dma.c1
-rw-r--r--arch/x86/include/asm/iommu.h1
-rw-r--r--arch/x86/kernel/pci-dma.c11
-rw-r--r--drivers/acpi/pci_root.c7
-rw-r--r--drivers/iommu/Kconfig13
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd_iommu.c883
-rw-r--r--drivers/iommu/amd_iommu_init.c133
-rw-r--r--drivers/iommu/amd_iommu_proto.h24
-rw-r--r--drivers/iommu/amd_iommu_types.h118
-rw-r--r--drivers/iommu/amd_iommu_v2.c994
-rw-r--r--drivers/iommu/intel-iommu.c79
-rw-r--r--drivers/iommu/iommu.c177
-rw-r--r--drivers/iommu/msm_iommu.c25
-rw-r--r--drivers/iommu/omap-iommu.c80
-rw-r--r--drivers/iommu/omap-iovmm.c48
-rw-r--r--drivers/media/video/omap3isp/isp.c30
-rw-r--r--drivers/media/video/omap3isp/isp.h2
-rw-r--r--drivers/media/video/omap3isp/ispccdc.c18
-rw-r--r--drivers/media/video/omap3isp/ispstat.c8
-rw-r--r--drivers/media/video/omap3isp/ispvideo.c4
-rw-r--r--drivers/pci/ats.c90
-rw-r--r--drivers/pci/hotplug/pciehp.h1
-rw-r--r--drivers/pci/hotplug/pciehp_core.c11
-rw-r--r--drivers/pci/hotplug/pciehp_ctrl.c4
-rw-r--r--drivers/pci/hotplug/pciehp_hpc.c1
-rw-r--r--drivers/pci/msi.c121
-rw-r--r--drivers/pci/pci-acpi.c13
-rw-r--r--drivers/pci/pcie/aspm.c58
-rw-r--r--include/linux/acpi.h4
-rw-r--r--include/linux/amd-iommu.h138
-rw-r--r--include/linux/iommu.h33
-rw-r--r--include/linux/msi.h3
-rw-r--r--include/linux/pci-aspm.h4
-rw-r--r--include/linux/pci.h1
-rw-r--r--include/linux/pci_regs.h34
-rw-r--r--virt/kvm/iommu.c8
43 files changed, 2943 insertions, 317 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
index 349ecf26ce10..34f51100f029 100644
--- a/Documentation/ABI/testing/sysfs-bus-pci
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -66,6 +66,24 @@ Description:
re-discover previously removed devices.
Depends on CONFIG_HOTPLUG.
+What: /sys/bus/pci/devices/.../msi_irqs/
+Date: September, 2011
+Contact: Neil Horman <nhorman@tuxdriver.com>
+Description:
+ The /sys/devices/.../msi_irqs directory contains a variable set
+ of sub-directories, with each sub-directory being named after a
+ corresponding msi irq vector allocated to that device. Each
+ numbered sub-directory N contains attributes of that irq.
+ Note that this directory is not created for device drivers which
+ do not support msi irqs
+
+What: /sys/bus/pci/devices/.../msi_irqs/<N>/mode
+Date: September 2011
+Contact: Neil Horman <nhorman@tuxdriver.com>
+Description:
+ This attribute indicates the mode that the irq vector named by
+ the parent directory is in (msi vs. msix)
+
What: /sys/bus/pci/devices/.../remove
Date: January 2009
Contact: Linux PCI developers <linux-pci@vger.kernel.org>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e69a461a06c2..9373d95319c1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -329,6 +329,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
is a lot of faster
off - do not initialize any AMD IOMMU found in
the system
+ force_isolation - Force device isolation for all
+ devices. The IOMMU driver is not
+ allowed anymore to lift isolation
+ requirements as needed. This option
+ does not override iommu=pt
amijoy.map= [HW,JOY] Amiga joystick support
Map of devices attached to JOY0DAT and JOY1DAT
@@ -1059,7 +1064,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
nomerge
forcesac
soft
- pt [x86, IA-64]
+ pt [x86, IA-64]
+ group_mf [x86, IA-64]
+
io7= [HW] IO7 for Marvel based alpha systems
See comment before marvel_specify_io7 in
diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c
index 35d5dffab7e1..46dfd1ae8f71 100644
--- a/arch/arm/mach-omap2/devices.c
+++ b/arch/arm/mach-omap2/devices.c
@@ -28,6 +28,7 @@
#include <plat/board.h>
#include <plat/mcbsp.h>
#include <plat/mmc.h>
+#include <plat/iommu.h>
#include <plat/dma.h>
#include <plat/omap_hwmod.h>
#include <plat/omap_device.h>
@@ -211,9 +212,15 @@ static struct platform_device omap3isp_device = {
.resource = omap3isp_resources,
};
+static struct omap_iommu_arch_data omap3_isp_iommu = {
+ .name = "isp",
+};
+
int omap3_init_camera(struct isp_platform_data *pdata)
{
omap3isp_device.dev.platform_data = pdata;
+ omap3isp_device.dev.archdata.iommu = &omap3_isp_iommu;
+
return platform_device_register(&omap3isp_device);
}
diff --git a/arch/arm/plat-omap/include/plat/iommu.h b/arch/arm/plat-omap/include/plat/iommu.h
index a1d79ee19250..88be3e628b33 100644
--- a/arch/arm/plat-omap/include/plat/iommu.h
+++ b/arch/arm/plat-omap/include/plat/iommu.h
@@ -111,6 +111,32 @@ struct iommu_platform_data {
u32 da_end;
};
+/**
+ * struct iommu_arch_data - omap iommu private data
+ * @name: name of the iommu device
+ * @iommu_dev: handle of the iommu device
+ *
+ * This is an omap iommu private data object, which binds an iommu user
+ * to its iommu device. This object should be placed at the iommu user's
+ * dev_archdata so generic IOMMU API can be used without having to
+ * utilize omap-specific plumbing anymore.
+ */
+struct omap_iommu_arch_data {
+ const char *name;
+ struct omap_iommu *iommu_dev;
+};
+
+/**
+ * dev_to_omap_iommu() - retrieves an omap iommu object from a user device
+ * @dev: iommu client device
+ */
+static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
+{
+ struct omap_iommu_arch_data *arch_data = dev->archdata.iommu;
+
+ return arch_data->iommu_dev;
+}
+
/* IOMMU errors */
#define OMAP_IOMMU_ERR_TLB_MISS (1 << 0)
#define OMAP_IOMMU_ERR_TRANS_FAULT (1 << 1)
@@ -163,8 +189,8 @@ extern int omap_iommu_set_isr(const char *name,
void *priv),
void *isr_priv);
-extern void omap_iommu_save_ctx(struct omap_iommu *obj);
-extern void omap_iommu_restore_ctx(struct omap_iommu *obj);
+extern void omap_iommu_save_ctx(struct device *dev);
+extern void omap_iommu_restore_ctx(struct device *dev);
extern int omap_install_iommu_arch(const struct iommu_functions *ops);
extern void omap_uninstall_iommu_arch(const struct iommu_functions *ops);
@@ -176,6 +202,5 @@ extern ssize_t
omap_iommu_dump_ctx(struct omap_iommu *obj, char *buf, ssize_t len);
extern size_t
omap_dump_tlb_entries(struct omap_iommu *obj, char *buf, ssize_t len);
-struct device *omap_find_iommu_device(const char *name);
#endif /* __MACH_IOMMU_H */
diff --git a/arch/arm/plat-omap/include/plat/iovmm.h b/arch/arm/plat-omap/include/plat/iovmm.h
index 6af1a91c0f36..498e57cda6cd 100644
--- a/arch/arm/plat-omap/include/plat/iovmm.h
+++ b/arch/arm/plat-omap/include/plat/iovmm.h
@@ -72,18 +72,18 @@ struct iovm_struct {
#define IOVMF_DA_FIXED (1 << (4 + IOVMF_SW_SHIFT))
-extern struct iovm_struct *omap_find_iovm_area(struct omap_iommu *obj, u32 da);
+extern struct iovm_struct *omap_find_iovm_area(struct device *dev, u32 da);
extern u32
-omap_iommu_vmap(struct iommu_domain *domain, struct omap_iommu *obj, u32 da,
+omap_iommu_vmap(struct iommu_domain *domain, struct device *dev, u32 da,
const struct sg_table *sgt, u32 flags);
extern struct sg_table *omap_iommu_vunmap(struct iommu_domain *domain,
- struct omap_iommu *obj, u32 da);
+ struct device *dev, u32 da);
extern u32
-omap_iommu_vmalloc(struct iommu_domain *domain, struct omap_iommu *obj,
+omap_iommu_vmalloc(struct iommu_domain *domain, struct device *dev,
u32 da, size_t bytes, u32 flags);
extern void
-omap_iommu_vfree(struct iommu_domain *domain, struct omap_iommu *obj,
+omap_iommu_vfree(struct iommu_domain *domain, struct device *dev,
const u32 da);
-extern void *omap_da_to_va(struct omap_iommu *obj, u32 da);
+extern void *omap_da_to_va(struct device *dev, u32 da);
#endif /* __IOMMU_MMAP_H */
diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h
index 105c93b00b1b..b6a809fa2995 100644
--- a/arch/ia64/include/asm/iommu.h
+++ b/arch/ia64/include/asm/iommu.h
@@ -11,10 +11,12 @@ extern void no_iommu_init(void);
extern int force_iommu, no_iommu;
extern int iommu_pass_through;
extern int iommu_detected;
+extern int iommu_group_mf;
#else
#define iommu_pass_through (0)
#define no_iommu (1)
#define iommu_detected (0)
+#define iommu_group_mf (0)
#endif
extern void iommu_dma_init(void);
extern void machvec_init(const char *name);
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index c16162c70860..eb1175720050 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -33,6 +33,7 @@ int force_iommu __read_mostly;
#endif
int iommu_pass_through;
+int iommu_group_mf;
/* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index 345c99cef152..dffc38ee6255 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -5,6 +5,7 @@ extern struct dma_map_ops nommu_dma_ops;
extern int force_iommu, no_iommu;
extern int iommu_detected;
extern int iommu_pass_through;
+extern int iommu_group_mf;
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 80dc793b3f63..1c4d769e21ea 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -45,6 +45,15 @@ int iommu_detected __read_mostly = 0;
*/
int iommu_pass_through __read_mostly;
+/*
+ * Group multi-function PCI devices into a single device-group for the
+ * iommu_device_group interface. This tells the iommu driver to pretend
+ * it cannot distinguish between functions of a device, exposing only one
+ * group for the device. Useful for disallowing use of individual PCI
+ * functions from userspace drivers.
+ */
+int iommu_group_mf __read_mostly;
+
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
/* Dummy device used for NULL arguments (normally ISA). */
@@ -169,6 +178,8 @@ static __init int iommu_setup(char *p)
#endif
if (!strncmp(p, "pt", 2))
iommu_pass_through = 1;
+ if (!strncmp(p, "group_mf", 8))
+ iommu_group_mf = 1;
gart_parse_options(p);
diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c
index 2672c798272f..7aff6312ce7c 100644
--- a/drivers/acpi/pci_root.c
+++ b/drivers/acpi/pci_root.c
@@ -596,6 +596,13 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device)
if (ACPI_SUCCESS(status)) {
dev_info(root->bus->bridge,
"ACPI _OSC control (0x%02x) granted\n", flags);
+ if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) {
+ /*
+ * We have ASPM control, but the FADT indicates
+ * that it's unsupported. Clear it.
+ */
+ pcie_clear_aspm(root->bus);
+ }
} else {
dev_info(root->bus->bridge,
"ACPI _OSC request failed (%s), "
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 5414253b185a..6bea6962f8ee 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -34,7 +34,9 @@ config AMD_IOMMU
bool "AMD IOMMU support"
select SWIOTLB
select PCI_MSI
- select PCI_IOV
+ select PCI_ATS
+ select PCI_PRI
+ select PCI_PASID
select IOMMU_API
depends on X86_64 && PCI && ACPI
---help---
@@ -58,6 +60,15 @@ config AMD_IOMMU_STATS
information to userspace via debugfs.
If unsure, say N.
+config AMD_IOMMU_V2
+ tristate "AMD IOMMU Version 2 driver (EXPERIMENTAL)"
+ depends on AMD_IOMMU && PROFILING && EXPERIMENTAL
+ select MMU_NOTIFIER
+ ---help---
+ This option enables support for the AMD IOMMUv2 features of the IOMMU
+ hardware. Select this option if you want to use devices that support
+ the the PCI PRI and PASID interface.
+
# Intel IOMMU support
config DMAR_TABLE
bool
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 2f4448794bc7..0e36b4934aff 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_IOMMU_API) += iommu.o
obj-$(CONFIG_MSM_IOMMU) += msm_iommu.o msm_iommu_dev.o
obj-$(CONFIG_AMD_IOMMU) += amd_iommu.o amd_iommu_init.o
+obj-$(CONFIG_AMD_IOMMU_V2) += amd_iommu_v2.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += iova.o intel-iommu.o
obj-$(CONFIG_IRQ_REMAP) += intr_remapping.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4ee277a8521a..cce1f03b8895 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -17,6 +17,7 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#include <linux/ratelimit.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/bitmap.h>
@@ -28,6 +29,8 @@
#include <linux/iommu.h>
#include <linux/delay.h>
#include <linux/amd-iommu.h>
+#include <linux/notifier.h>
+#include <linux/export.h>
#include <asm/msidef.h>
#include <asm/proto.h>
#include <asm/iommu.h>
@@ -41,6 +44,24 @@
#define LOOP_TIMEOUT 100000
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * Traditionally the IOMMU core just handed us the mappings directly,
+ * after making sure the size is an order of a 4KiB page and that the
+ * mapping has natural alignment.
+ *
+ * To retain this behavior, we currently advertise that we support
+ * all page sizes that are an order of 4KiB.
+ *
+ * If at some point we'd like to utilize the IOMMU core's new behavior,
+ * we could change this to advertise the real page sizes we support.
+ */
+#define AMD_IOMMU_PGSIZES (~0xFFFUL)
+
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
/* A list of preallocated protection domains */
@@ -59,6 +80,9 @@ static struct protection_domain *pt_domain;
static struct iommu_ops amd_iommu_ops;
+static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
+int amd_iommu_max_glx_val = -1;
+
/*
* general struct to manage commands send to an IOMMU
*/
@@ -67,6 +91,7 @@ struct iommu_cmd {
};
static void update_domain(struct protection_domain *domain);
+static int __init alloc_passthrough_domain(void);
/****************************************************************************
*
@@ -147,6 +172,33 @@ static struct iommu_dev_data *get_dev_data(struct device *dev)
return dev->archdata.iommu;
}
+static bool pci_iommuv2_capable(struct pci_dev *pdev)
+{
+ static const int caps[] = {
+ PCI_EXT_CAP_ID_ATS,
+ PCI_EXT_CAP_ID_PRI,
+ PCI_EXT_CAP_ID_PASID,
+ };
+ int i, pos;
+
+ for (i = 0; i < 3; ++i) {
+ pos = pci_find_ext_capability(pdev, caps[i]);
+ if (pos == 0)
+ return false;
+ }
+
+ return true;
+}
+
+static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
+{
+ struct iommu_dev_data *dev_data;
+
+ dev_data = get_dev_data(&pdev->dev);
+
+ return dev_data->errata & (1 << erratum) ? true : false;
+}
+
/*
* In this function the list of preallocated protection domains is traversed to
* find the domain for a specific device
@@ -204,6 +256,7 @@ static bool check_device(struct device *dev)
static int iommu_init_device(struct device *dev)
{
+ struct pci_dev *pdev = to_pci_dev(dev);
struct iommu_dev_data *dev_data;
u16 alias;
@@ -228,6 +281,13 @@ static int iommu_init_device(struct device *dev)
dev_data->alias_data = alias_data;
}
+ if (pci_iommuv2_capable(pdev)) {
+ struct amd_iommu *iommu;
+
+ iommu = amd_iommu_rlookup_table[dev_data->devid];
+ dev_data->iommu_v2 = iommu->is_iommu_v2;
+ }
+
dev->archdata.iommu = dev_data;
return 0;
@@ -317,6 +377,11 @@ DECLARE_STATS_COUNTER(domain_flush_single);
DECLARE_STATS_COUNTER(domain_flush_all);
DECLARE_STATS_COUNTER(alloced_io_mem);
DECLARE_STATS_COUNTER(total_map_requests);
+DECLARE_STATS_COUNTER(complete_ppr);
+DECLARE_STATS_COUNTER(invalidate_iotlb);
+DECLARE_STATS_COUNTER(invalidate_iotlb_all);
+DECLARE_STATS_COUNTER(pri_requests);
+
static struct dentry *stats_dir;
static struct dentry *de_fflush;
@@ -351,6 +416,10 @@ static void amd_iommu_stats_init(void)
amd_iommu_stats_add(&domain_flush_all);
amd_iommu_stats_add(&alloced_io_mem);
amd_iommu_stats_add(&total_map_requests);
+ amd_iommu_stats_add(&complete_ppr);
+ amd_iommu_stats_add(&invalidate_iotlb);
+ amd_iommu_stats_add(&invalidate_iotlb_all);
+ amd_iommu_stats_add(&pri_requests);
}
#endif
@@ -365,8 +434,8 @@ static void dump_dte_entry(u16 devid)
{
int i;
- for (i = 0; i < 8; ++i)
- pr_err("AMD-Vi: DTE[%d]: %08x\n", i,
+ for (i = 0; i < 4; ++i)
+ pr_err("AMD-Vi: DTE[%d]: %016llx\n", i,
amd_iommu_dev_table[devid].data[i]);
}
@@ -461,12 +530,84 @@ static void iommu_poll_events(struct amd_iommu *iommu)
spin_unlock_irqrestore(&iommu->lock, flags);
}
+static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head)
+{
+ struct amd_iommu_fault fault;
+ volatile u64 *raw;
+ int i;
+
+ INC_STATS_COUNTER(pri_requests);
+
+ raw = (u64 *)(iommu->ppr_log + head);
+
+ /*
+ * Hardware bug: Interrupt may arrive before the entry is written to
+ * memory. If this happens we need to wait for the entry to arrive.
+ */
+ for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ if (PPR_REQ_TYPE(raw[0]) != 0)
+ break;
+ udelay(1);
+ }
+
+ if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
+ pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n");
+ return;
+ }
+
+ fault.address = raw[1];
+ fault.pasid = PPR_PASID(raw[0]);
+ fault.device_id = PPR_DEVID(raw[0]);
+ fault.tag = PPR_TAG(raw[0]);
+ fault.flags = PPR_FLAGS(raw[0]);
+
+ /*
+ * To detect the hardware bug we need to clear the entry
+ * to back to zero.
+ */
+ raw[0] = raw[1] = 0;
+
+ atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
+}
+
+static void iommu_poll_ppr_log(struct amd_iommu *iommu)
+{
+ unsigned long flags;
+ u32 head, tail;
+
+ if (iommu->ppr_log == NULL)
+ return;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+
+ head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+ tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+
+ while (head != tail) {
+
+ /* Handle PPR entry */
+ iommu_handle_ppr_entry(iommu, head);
+
+ /* Update and refresh ring-buffer state*/
+ head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
+ writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+ tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+ }
+
+ /* enable ppr interrupts again */
+ writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
irqreturn_t amd_iommu_int_thread(int irq, void *data)
{
struct amd_iommu *iommu;
- for_each_iommu(iommu)
+ for_each_iommu(iommu) {
iommu_poll_events(iommu);
+ iommu_poll_ppr_log(iommu);
+ }
return IRQ_HANDLED;
}
@@ -595,6 +736,60 @@ static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
}
+static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid,
+ u64 address, bool size)
+{
+ memset(cmd, 0, sizeof(*cmd));
+
+ address &= ~(0xfffULL);
+
+ cmd->data[0] = pasid & PASID_MASK;
+ cmd->data[1] = domid;
+ cmd->data[2] = lower_32_bits(address);
+ cmd->data[3] = upper_32_bits(address);
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
+ if (size)
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+ CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
+}
+
+static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid,
+ int qdep, u64 address, bool size)
+{
+ memset(cmd, 0, sizeof(*cmd));
+
+ address &= ~(0xfffULL);
+
+ cmd->data[0] = devid;
+ cmd->data[0] |= (pasid & 0xff) << 16;
+ cmd->data[0] |= (qdep & 0xff) << 24;
+ cmd->data[1] = devid;
+ cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16;
+ cmd->data[2] = lower_32_bits(address);
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK;
+ cmd->data[3] = upper_32_bits(address);
+ if (size)
+ cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
+ CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
+}
+
+static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid,
+ int status, int tag, bool gn)
+{
+ memset(cmd, 0, sizeof(*cmd));
+
+ cmd->data[0] = devid;
+ if (gn) {
+ cmd->data[1] = pasid & PASID_MASK;
+ cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK;
+ }
+ cmd->data[3] = tag & 0x1ff;
+ cmd->data[3] |= (status & PPR_STATUS_MASK) << PPR_STATUS_SHIFT;
+
+ CMD_SET_TYPE(cmd, CMD_COMPLETE_PPR);
+}
+
static void build_inv_all(struct iommu_cmd *cmd)
{
memset(cmd, 0, sizeof(*cmd));
@@ -1496,6 +1691,48 @@ static void free_pagetable(struct protection_domain *domain)
domain->pt_root = NULL;
}
+static void free_gcr3_tbl_level1(u64 *tbl)
+{
+ u64 *ptr;
+ int i;
+
+ for (i = 0; i < 512; ++i) {
+ if (!(tbl[i] & GCR3_VALID))
+ continue;
+
+ ptr = __va(tbl[i] & PAGE_MASK);
+
+ free_page((unsigned long)ptr);
+ }
+}
+
+static void free_gcr3_tbl_level2(u64 *tbl)
+{
+ u64 *ptr;
+ int i;
+
+ for (i = 0; i < 512; ++i) {
+ if (!(tbl[i] & GCR3_VALID))
+ continue;
+
+ ptr = __va(tbl[i] & PAGE_MASK);
+
+ free_gcr3_tbl_level1(ptr);
+ }
+}
+
+static void free_gcr3_table(struct protection_domain *domain)
+{
+ if (domain->glx == 2)
+ free_gcr3_tbl_level2(domain->gcr3_tbl);
+ else if (domain->glx == 1)
+ free_gcr3_tbl_level1(domain->gcr3_tbl);
+ else if (domain->glx != 0)
+ BUG();
+
+ free_page((unsigned long)domain->gcr3_tbl);
+}
+
/*
* Free a domain, only used if something went wrong in the
* allocation path and we need to free an already allocated page table
@@ -1582,20 +1819,52 @@ static bool dma_ops_domain(struct protection_domain *domain)
static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
{
- u64 pte_root = virt_to_phys(domain->pt_root);
- u32 flags = 0;
+ u64 pte_root = 0;
+ u64 flags = 0;
+
+ if (domain->mode != PAGE_MODE_NONE)
+ pte_root = virt_to_phys(domain->pt_root);
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+ flags = amd_iommu_dev_table[devid].data[1];
+
if (ats)
flags |= DTE_FLAG_IOTLB;
- amd_iommu_dev_table[devid].data[3] |= flags;
- amd_iommu_dev_table[devid].data[2] = domain->id;
- amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root);
- amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root);
+ if (domain->flags & PD_IOMMUV2_MASK) {
+ u64 gcr3 = __pa(domain->gcr3_tbl);
+ u64 glx = domain->glx;
+ u64 tmp;
+
+ pte_root |= DTE_FLAG_GV;
+ pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
+
+ /* First mask out possible old values for GCR3 table */
+ tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
+ flags &= ~tmp;
+
+ tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+ flags &= ~tmp;
+
+ /* Encode GCR3 table into DTE */
+ tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
+ pte_root |= tmp;
+
+ tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
+ flags |= tmp;
+
+ tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
+ flags |= tmp;
+ }
+
+ flags &= ~(0xffffUL);
+ flags |= domain->id;
+
+ amd_iommu_dev_table[devid].data[1] = flags;
+ amd_iommu_dev_table[devid].data[0] = pte_root;
}
static void clear_dte_entry(u16 devid)
@@ -1603,7 +1872,6 @@ static void clear_dte_entry(u16 devid)
/* remove entry from the device table seen by the hardware */
amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
amd_iommu_dev_table[devid].data[1] = 0;
- amd_iommu_dev_table[devid].data[2] = 0;
amd_iommu_apply_erratum_63(devid);
}
@@ -1696,6 +1964,93 @@ out_unlock:
return ret;
}
+
+static void pdev_iommuv2_disable(struct pci_dev *pdev)
+{
+ pci_disable_ats(pdev);
+ pci_disable_pri(pdev);
+ pci_disable_pasid(pdev);
+}
+
+/* FIXME: Change generic reset-function to do the same */
+static int pri_reset_while_enabled(struct pci_dev *pdev)
+{
+ u16 control;
+ int pos;
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
+ if (!pos)
+ return -EINVAL;
+
+ pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
+ control |= PCI_PRI_CTRL_RESET;
+ pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
+
+ return 0;
+}
+
+static int pdev_iommuv2_enable(struct pci_dev *pdev)
+{
+ bool reset_enable;
+ int reqs, ret;
+
+ /* FIXME: Hardcode number of outstanding requests for now */
+ reqs = 32;
+ if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
+ reqs = 1;
+ reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
+
+ /* Only allow access to user-accessible pages */
+ ret = pci_enable_pasid(pdev, 0);
+ if (ret)
+ goto out_err;
+
+ /* First reset the PRI state of the device */
+ ret = pci_reset_pri(pdev);
+ if (ret)
+ goto out_err;
+
+ /* Enable PRI */
+ ret = pci_enable_pri(pdev, reqs);
+ if (ret)
+ goto out_err;
+
+ if (reset_enable) {
+ ret = pri_reset_while_enabled(pdev);
+ if (ret)
+ goto out_err;
+ }
+
+ ret = pci_enable_ats(pdev, PAGE_SHIFT);
+ if (ret)
+ goto out_err;
+
+ return 0;
+
+out_err:
+ pci_disable_pri(pdev);
+ pci_disable_pasid(pdev);
+
+ return ret;
+}
+
+/* FIX