From 4781bc427b0d23bfde4675d210fd35debee2c9de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= <jglisse@redhat.com>
Date: Mon, 31 Aug 2015 18:13:03 -0400
Subject: iommu/amd: Return positive value in amd_iommu_detect()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix amd_iommu_detect() to return positive value on success, like
intended, and not zero. This will not change anything in the end
as AMD IOMMU disable swiotlb and properly associate itself with
devices even if detect() doesn't return a positive value.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Joerg Roedel <jroedel@suse.de>
Cc: iommu@lists.linux-foundation.org
---
 drivers/iommu/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5ef347a13cb5..ea2afefe8e57 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -2198,7 +2198,7 @@ int __init amd_iommu_detect(void)
 	iommu_detected = 1;
 	x86_init.iommu.iommu_init = amd_iommu_init;
 
-	return 0;
+	return 1;
 }
 
 /****************************************************************************
-- 
cgit v1.2.3


From a591989a7c162587f24305c3fe3bd8f055ed3329 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@lip6.fr>
Date: Sun, 13 Sep 2015 14:15:31 +0200
Subject: iommu/amd: Drop null test before destroy functions

Remove unneeded NULL test.

The semantic patch that makes this change is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@ expression x; @@
-if (x != NULL) {
  \(kmem_cache_destroy\|mempool_destroy\|dma_pool_destroy\)(x);
  x = NULL;
-}
// </smpl>

Signed-off-by: Julia Lawall <Julia.Lawall@lip6.fr>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu_init.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'drivers')

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index ea2afefe8e57..3ba1ee709824 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1755,11 +1755,8 @@ static void __init free_on_init_error(void)
 	free_pages((unsigned long)irq_lookup_table,
 		   get_order(rlookup_table_size));
 
-	if (amd_iommu_irq_cache) {
-		kmem_cache_destroy(amd_iommu_irq_cache);
-		amd_iommu_irq_cache = NULL;
-
-	}
+	kmem_cache_destroy(amd_iommu_irq_cache);
+	amd_iommu_irq_cache = NULL;
 
 	free_pages((unsigned long)amd_iommu_rlookup_table,
 		   get_order(rlookup_table_size));
-- 
cgit v1.2.3


From d66ce54b4664a0d66429a4de996741581d71cf90 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 23 Sep 2015 19:00:10 +0200
Subject: iommu/vt-d: Split iommu_prepare_identity_map

Split the part of the function that fetches the domain out
and put the rest into into a domain_prepare_identity_map, so
that the code can also be used with when the domain is
already known.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

(limited to 'drivers')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 2d7349a3ee14..24d31090b140 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -2429,17 +2429,11 @@ static int iommu_domain_identity_map(struct dmar_domain *domain,
 				  DMA_PTE_READ|DMA_PTE_WRITE);
 }
 
-static int iommu_prepare_identity_map(struct device *dev,
-				      unsigned long long start,
-				      unsigned long long end)
+static int domain_prepare_identity_map(struct device *dev,
+				       struct dmar_domain *domain,
+				       unsigned long long start,
+				       unsigned long long end)
 {
-	struct dmar_domain *domain;
-	int ret;
-
-	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
-	if (!domain)
-		return -ENOMEM;
-
 	/* For _hardware_ passthrough, don't bother. But for software
 	   passthrough, we do it anyway -- it may indicate a memory
 	   range which is reserved in E820, so which didn't get set
@@ -2459,8 +2453,7 @@ static int iommu_prepare_identity_map(struct device *dev,
 			dmi_get_system_info(DMI_BIOS_VENDOR),
 			dmi_get_system_info(DMI_BIOS_VERSION),
 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
-		ret = -EIO;
-		goto error;
+		return -EIO;
 	}
 
 	if (end >> agaw_to_width(domain->agaw)) {
@@ -2470,18 +2463,27 @@ static int iommu_prepare_identity_map(struct device *dev,
 		     dmi_get_system_info(DMI_BIOS_VENDOR),
 		     dmi_get_system_info(DMI_BIOS_VERSION),
 		     dmi_get_system_info(DMI_PRODUCT_VERSION));
-		ret = -EIO;
-		goto error;
+		return -EIO;
 	}
 
-	ret = iommu_domain_identity_map(domain, start, end);
-	if (ret)
-		goto error;
+	return iommu_domain_identity_map(domain, start, end);
+}
 
-	return 0;
+static int iommu_prepare_identity_map(struct device *dev,
+				      unsigned long long start,
+				      unsigned long long end)
+{
+	struct dmar_domain *domain;
+	int ret;
+
+	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
+	if (!domain)
+		return -ENOMEM;
+
+	ret = domain_prepare_identity_map(dev, domain, start, end);
+	if (ret)
+		domain_exit(domain);
 
- error:
-	domain_exit(domain);
 	return ret;
 }
 
-- 
cgit v1.2.3


From b1ce5b79aec8d8cd8bcd076d8cce8bc3cd690051 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 23 Sep 2015 19:16:01 +0200
Subject: iommu/vt-d: Create RMRR mappings in newly allocated domains

Currently the RMRR entries are created only at boot time.
This means they will vanish when the domain allocated at
boot time is destroyed.
This patch makes sure that also newly allocated domains will
get RMRR mappings.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'drivers')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 24d31090b140..6ac6e741c46e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3241,7 +3241,10 @@ static struct iova *intel_alloc_iova(struct device *dev,
 
 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 {
+	struct dmar_rmrr_unit *rmrr;
 	struct dmar_domain *domain;
+	struct device *i_dev;
+	int i, ret;
 
 	domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
 	if (!domain) {
@@ -3250,6 +3253,23 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
 		return NULL;
 	}
 
+	/* We have a new domain - setup possible RMRRs for the device */
+	rcu_read_lock();
+	for_each_rmrr_units(rmrr) {
+		for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
+					  i, i_dev) {
+			if (i_dev != dev)
+				continue;
+
+			ret = domain_prepare_identity_map(dev, domain,
+							  rmrr->base_address,
+							  rmrr->end_address);
+			if (ret)
+				dev_err(dev, "Mapping reserved region failed\n");
+		}
+	}
+	rcu_read_unlock();
+
 	return domain;
 }
 
-- 
cgit v1.2.3


From 8128f23c436d0dd4f72412e1bf9256e424479dc3 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Thu, 27 Aug 2015 15:33:03 +0200
Subject: iommu/s390: Add iommu api for s390 pci devices

This adds an IOMMU API implementation for s390 PCI devices.

Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig      |   7 +
 drivers/iommu/Makefile     |   1 +
 drivers/iommu/s390-iommu.c | 337 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 345 insertions(+)
 create mode 100644 drivers/iommu/s390-iommu.c

(limited to 'drivers')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d9da766719c8..5feb70a91a8c 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -369,4 +369,11 @@ config ARM_SMMU_V3
 	  Say Y here if your system includes an IOMMU device implementing
 	  the ARM SMMUv3 architecture.
 
+config S390_IOMMU
+	def_bool y if S390 && PCI
+	depends on S390 && PCI
+	select IOMMU_API
+	help
+	  Support for the IOMMU API for s390 PCI devices.
+
 endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6dcc513d711..a2056d35420b 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
 obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
 obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
+obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
new file mode 100644
index 000000000000..cbe198cb3699
--- /dev/null
+++ b/drivers/iommu/s390-iommu.c
@@ -0,0 +1,337 @@
+/*
+ * IOMMU API for s390 PCI devices
+ *
+ * Copyright IBM Corp. 2015
+ * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
+ */
+
+#include <linux/pci.h>
+#include <linux/iommu.h>
+#include <linux/iommu-helper.h>
+#include <linux/pci.h>
+#include <linux/sizes.h>
+#include <asm/pci_dma.h>
+
+/*
+ * Physically contiguous memory regions can be mapped with 4 KiB alignment,
+ * we allow all page sizes that are an order of 4KiB (no special large page
+ * support so far).
+ */
+#define S390_IOMMU_PGSIZES	(~0xFFFUL)
+
+struct s390_domain {
+	struct iommu_domain	domain;
+	struct list_head	devices;
+	unsigned long		*dma_table;
+	spinlock_t		dma_table_lock;
+	spinlock_t		list_lock;
+};
+
+struct s390_domain_device {
+	struct list_head	list;
+	struct zpci_dev		*zdev;
+};
+
+static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct s390_domain, domain);
+}
+
+static bool s390_iommu_capable(enum iommu_cap cap)
+{
+	switch (cap) {
+	case IOMMU_CAP_CACHE_COHERENCY:
+		return true;
+	case IOMMU_CAP_INTR_REMAP:
+		return true;
+	default:
+		return false;
+	}
+}
+
+struct iommu_domain *s390_domain_alloc(unsigned domain_type)
+{
+	struct s390_domain *s390_domain;
+
+	if (domain_type != IOMMU_DOMAIN_UNMANAGED)
+		return NULL;
+
+	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
+	if (!s390_domain)
+		return NULL;
+
+	s390_domain->dma_table = dma_alloc_cpu_table();
+	if (!s390_domain->dma_table) {
+		kfree(s390_domain);
+		return NULL;
+	}
+
+	spin_lock_init(&s390_domain->dma_table_lock);
+	spin_lock_init(&s390_domain->list_lock);
+	INIT_LIST_HEAD(&s390_domain->devices);
+
+	return &s390_domain->domain;
+}
+
+void s390_domain_free(struct iommu_domain *domain)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+
+	dma_cleanup_tables(s390_domain->dma_table);
+	kfree(s390_domain);
+}
+
+static int s390_iommu_attach_device(struct iommu_domain *domain,
+				    struct device *dev)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+	struct s390_domain_device *domain_device;
+	unsigned long flags;
+	int rc;
+
+	if (!zdev)
+		return -ENODEV;
+
+	domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
+	if (!domain_device)
+		return -ENOMEM;
+
+	if (zdev->dma_table)
+		zpci_dma_exit_device(zdev);
+
+	zdev->dma_table = s390_domain->dma_table;
+	rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
+				zdev->start_dma + zdev->iommu_size - 1,
+				(u64) zdev->dma_table);
+	if (rc)
+		goto out_restore;
+
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
+	/* First device defines the DMA range limits */
+	if (list_empty(&s390_domain->devices)) {
+		domain->geometry.aperture_start = zdev->start_dma;
+		domain->geometry.aperture_end = zdev->end_dma;
+		domain->geometry.force_aperture = true;
+	/* Allow only devices with identical DMA range limits */
+	} else if (domain->geometry.aperture_start != zdev->start_dma ||
+		   domain->geometry.aperture_end != zdev->end_dma) {
+		rc = -EINVAL;
+		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+		goto out_restore;
+	}
+	domain_device->zdev = zdev;
+	zdev->s390_domain = s390_domain;
+	list_add(&domain_device->list, &s390_domain->devices);
+	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+	return 0;
+
+out_restore:
+	zpci_dma_init_device(zdev);
+	kfree(domain_device);
+
+	return rc;
+}
+
+static void s390_iommu_detach_device(struct iommu_domain *domain,
+				     struct device *dev)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+	struct s390_domain_device *domain_device, *tmp;
+	unsigned long flags;
+	int found = 0;
+
+	if (!zdev)
+		return;
+
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
+	list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
+				 list) {
+		if (domain_device->zdev == zdev) {
+			list_del(&domain_device->list);
+			kfree(domain_device);
+			found = 1;
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+	if (found) {
+		zdev->s390_domain = NULL;
+		zpci_unregister_ioat(zdev, 0);
+		zpci_dma_init_device(zdev);
+	}
+}
+
+static int s390_iommu_add_device(struct device *dev)
+{
+	struct iommu_group *group;
+	int rc;
+
+	group = iommu_group_get(dev);
+	if (!group) {
+		group = iommu_group_alloc();
+		if (IS_ERR(group))
+			return PTR_ERR(group);
+	}
+
+	rc = iommu_group_add_device(group, dev);
+	iommu_group_put(group);
+
+	return rc;
+}
+
+static void s390_iommu_remove_device(struct device *dev)
+{
+	struct zpci_dev *zdev = to_pci_dev(dev)->sysdata;
+	struct iommu_domain *domain;
+
+	/*
+	 * This is a workaround for a scenario where the IOMMU API common code
+	 * "forgets" to call the detach_dev callback: After binding a device
+	 * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers
+	 * the attach_dev), removing the device via
+	 * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev,
+	 * only remove_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
+	 * notifier.
+	 *
+	 * So let's call detach_dev from here if it hasn't been called before.
+	 */
+	if (zdev && zdev->s390_domain) {
+		domain = iommu_get_domain_for_dev(dev);
+		if (domain)
+			s390_iommu_detach_device(domain, dev);
+	}
+
+	iommu_group_remove_device(dev);
+}
+
+static int s390_iommu_update_trans(struct s390_domain *s390_domain,
+				   unsigned long pa, dma_addr_t dma_addr,
+				   size_t size, int flags)
+{
+	struct s390_domain_device *domain_device;
+	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
+	dma_addr_t start_dma_addr = dma_addr;
+	unsigned long irq_flags, nr_pages, i;
+	int rc = 0;
+
+	if (dma_addr < s390_domain->domain.geometry.aperture_start ||
+	    dma_addr + size > s390_domain->domain.geometry.aperture_end)
+		return -EINVAL;
+
+	nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	if (!nr_pages)
+		return 0;
+
+	spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
+	for (i = 0; i < nr_pages; i++) {
+		dma_update_cpu_trans(s390_domain->dma_table, page_addr,
+				     dma_addr, flags);
+		page_addr += PAGE_SIZE;
+		dma_addr += PAGE_SIZE;
+	}
+
+	spin_lock(&s390_domain->list_lock);
+	list_for_each_entry(domain_device, &s390_domain->devices, list) {
+		rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
+					start_dma_addr, nr_pages * PAGE_SIZE);
+		if (rc)
+			break;
+	}
+	spin_unlock(&s390_domain->list_lock);
+	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
+
+	return rc;
+}
+
+static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
+			  phys_addr_t paddr, size_t size, int prot)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	int flags = ZPCI_PTE_VALID, rc = 0;
+
+	if (!(prot & IOMMU_READ))
+		return -EINVAL;
+
+	if (!(prot & IOMMU_WRITE))
+		flags |= ZPCI_TABLE_PROTECTED;
+
+	rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+				     size, flags);
+
+	return rc;
+}
+
+static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
+					   dma_addr_t iova)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	unsigned long *sto, *pto, *rto, flags;
+	unsigned int rtx, sx, px;
+	phys_addr_t phys = 0;
+
+	if (iova < domain->geometry.aperture_start ||
+	    iova > domain->geometry.aperture_end)
+		return 0;
+
+	rtx = calc_rtx(iova);
+	sx = calc_sx(iova);
+	px = calc_px(iova);
+	rto = s390_domain->dma_table;
+
+	spin_lock_irqsave(&s390_domain->dma_table_lock, flags);
+	if (rto && reg_entry_isvalid(rto[rtx])) {
+		sto = get_rt_sto(rto[rtx]);
+		if (sto && reg_entry_isvalid(sto[sx])) {
+			pto = get_st_pto(sto[sx]);
+			if (pto && pt_entry_isvalid(pto[px]))
+				phys = pto[px] & ZPCI_PTE_ADDR_MASK;
+		}
+	}
+	spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags);
+
+	return phys;
+}
+
+static size_t s390_iommu_unmap(struct iommu_domain *domain,
+			       unsigned long iova, size_t size)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	int flags = ZPCI_PTE_INVALID;
+	phys_addr_t paddr;
+	int rc;
+
+	paddr = s390_iommu_iova_to_phys(domain, iova);
+	if (!paddr)
+		return 0;
+
+	rc = s390_iommu_update_trans(s390_domain, (unsigned long) paddr, iova,
+				     size, flags);
+	if (rc)
+		return 0;
+
+	return size;
+}
+
+static struct iommu_ops s390_iommu_ops = {
+	.capable = s390_iommu_capable,
+	.domain_alloc = s390_domain_alloc,
+	.domain_free = s390_domain_free,
+	.attach_dev = s390_iommu_attach_device,
+	.detach_dev = s390_iommu_detach_device,
+	.map = s390_iommu_map,
+	.unmap = s390_iommu_unmap,
+	.iova_to_phys = s390_iommu_iova_to_phys,
+	.add_device = s390_iommu_add_device,
+	.remove_device = s390_iommu_remove_device,
+	.pgsize_bitmap = S390_IOMMU_PGSIZES,
+};
+
+static int __init s390_iommu_init(void)
+{
+	return bus_set_iommu(&pci_bus_type, &s390_iommu_ops);
+}
+subsys_initcall(s390_iommu_init);
-- 
cgit v1.2.3


From 3ca9299e7dc65f2d8242cd7804818a8e840b5a26 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Fri, 2 Oct 2015 18:02:44 -0500
Subject: iommu/omap: Add support for configuring dsp iommus on DRA7xx

The DSP MMUs on DRA7xx SoC requires configuring an additional
MMU_CONFIG register present in the DSP_SYSTEM sub module. This
setting dictates whether the DSP Core's MDMA and EDMA traffic
is routed through the respective MMU or not. Add the support
to the OMAP iommu driver so that the traffic is not bypassed
when enabling the MMUs.

The MMU_CONFIG register has two different bits for enabling
each of these two MMUs present in the DSP processor sub-system
on DRA7xx. An id field is added to the OMAP iommu object to
identify and enable each IOMMU. The id information and the
DSP_SYSTEM.MMU_CONFIG register programming is achieved through
the processing of the optional "ti,syscon-mmuconfig" property.
A proper value is assigned to the id field only when this
property is present.

Signed-off-by: Suman Anna <s-anna@ti.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/omap-iommu.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++
 drivers/iommu/omap-iommu.h |  9 +++++++
 2 files changed, 67 insertions(+)

(limited to 'drivers')

diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 36d0033c2ccb..3dc5b65f3990 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -26,6 +26,8 @@
 #include <linux/of_iommu.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
 
 #include <asm/cacheflush.h>
 
@@ -112,6 +114,18 @@ void omap_iommu_restore_ctx(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx);
 
+static void dra7_cfg_dspsys_mmu(struct omap_iommu *obj, bool enable)
+{
+	u32 val, mask;
+
+	if (!obj->syscfg)
+		return;
+
+	mask = (1 << (obj->id * DSP_SYS_MMU_CONFIG_EN_SHIFT));
+	val = enable ? mask : 0;
+	regmap_update_bits(obj->syscfg, DSP_SYS_MMU_CONFIG, mask, val);
+}
+
 static void __iommu_set_twl(struct omap_iommu *obj, bool on)
 {
 	u32 l = iommu_read_reg(obj, MMU_CNTL);
@@ -147,6 +161,8 @@ static int omap2_iommu_enable(struct omap_iommu *obj)
 
 	iommu_write_reg(obj, pa, MMU_TTB);
 
+	dra7_cfg_dspsys_mmu(obj, true);
+
 	if (obj->has_bus_err_back)
 		iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG);
 
@@ -161,6 +177,7 @@ static void omap2_iommu_disable(struct omap_iommu *obj)
 
 	l &= ~MMU_CNTL_MASK;
 	iommu_write_reg(obj, l, MMU_CNTL);
+	dra7_cfg_dspsys_mmu(obj, false);
 
 	dev_dbg(obj->dev, "%s is shutting down\n", obj->name);
 }
@@ -864,6 +881,42 @@ static void omap_iommu_detach(struct omap_iommu *obj)
 	dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name);
 }
 
+static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev,
+					      struct omap_iommu *obj)
+{
+	struct device_node *np = pdev->dev.of_node;
+	int ret;
+
+	if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu"))
+		return 0;
+
+	if (!of_property_read_bool(np, "ti,syscon-mmuconfig")) {
+		dev_err(&pdev->dev, "ti,syscon-mmuconfig property is missing\n");
+		return -EINVAL;
+	}
+
+	obj->syscfg =
+		syscon_regmap_lookup_by_phandle(np, "ti,syscon-mmuconfig");
+	if (IS_ERR(obj->syscfg)) {
+		/* can fail with -EPROBE_DEFER */
+		ret = PTR_ERR(obj->syscfg);
+		return ret;
+	}
+
+	if (of_property_read_u32_index(np, "ti,syscon-mmuconfig", 1,
+				       &obj->id)) {
+		dev_err(&pdev->dev, "couldn't get the IOMMU instance id within subsystem\n");
+		return -EINVAL;
+	}
+
+	if (obj->id != 0 && obj->id != 1) {
+		dev_err(&pdev->dev, "invalid IOMMU instance id\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /*
  *	OMAP Device MMU(IOMMU) detection
  */
@@ -907,6 +960,10 @@ static int omap_iommu_probe(struct platform_device *pdev)
 	if (IS_ERR(obj->regbase))
 		return PTR_ERR(obj->regbase);
 
+	err = omap_iommu_dra7_get_dsp_system_cfg(pdev, obj);
+	if (err)
+		return err;
+
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
 		return -ENODEV;
@@ -943,6 +1000,7 @@ static const struct of_device_id omap_iommu_of_match[] = {
 	{ .compatible = "ti,omap2-iommu" },
 	{ .compatible = "ti,omap4-iommu" },
 	{ .compatible = "ti,dra7-iommu"	},
+	{ .compatible = "ti,dra7-dsp-iommu" },
 	{},
 };
 
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index a656df2f9e03..59628e5017b4 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -30,6 +30,7 @@ struct iotlb_entry {
 struct omap_iommu {
 	const char	*name;
 	void __iomem	*regbase;
+	struct regmap	*syscfg;
 	struct device	*dev;
 	struct iommu_domain *domain;
 	struct dentry	*debug_dir;
@@ -48,6 +49,7 @@ struct omap_iommu {
 	void *ctx; /* iommu context: registres saved area */
 
 	int has_bus_err_back;
+	u32 id;
 };
 
 struct cr_regs {
@@ -158,6 +160,13 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev)
 	 ((pgsz) == MMU_CAM_PGSZ_64K) ? 0xffff0000 :	\
 	 ((pgsz) == MMU_CAM_PGSZ_4K)  ? 0xfffff000 : 0)
 
+/*
+ * DSP_SYSTEM registers and bit definitions (applicable only for DRA7xx DSP)
+ */
+#define DSP_SYS_REVISION		0x00
+#define DSP_SYS_MMU_CONFIG		0x18
+#define DSP_SYS_MMU_CONFIG_EN_SHIFT	4
+
 /*
  * utilities for super page(16MB, 1MB, 64KB and 4KB)
  */
-- 
cgit v1.2.3


From 941a802d939221028baf6d19afd6dc8652219a81 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 11 Aug 2015 16:25:10 +0100
Subject: iommu/arm-smmu: Use drvdata instead of maintaining smmu_devices list

Rather than keep a private list of struct arm_smmu_device and searching
this whenever we need to look up the correct SMMU instance, instead use
the drvdata field in the struct device to take care of the mapping for
us.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 42 +++++++++---------------------------------
 1 file changed, 9 insertions(+), 33 deletions(-)

(limited to 'drivers')

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index dafaf59dc3b8..637014ab8965 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/of_platform.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 
@@ -568,7 +569,6 @@ struct arm_smmu_device {
 	unsigned int			sid_bits;
 
 	struct arm_smmu_strtab_cfg	strtab_cfg;
-	struct list_head		list;
 };
 
 /* SMMU private data for an IOMMU group */
@@ -603,10 +603,6 @@ struct arm_smmu_domain {
 	struct iommu_domain		domain;
 };
 
-/* Our list of SMMU instances */
-static DEFINE_SPINLOCK(arm_smmu_devices_lock);
-static LIST_HEAD(arm_smmu_devices);
-
 struct arm_smmu_option_prop {
 	u32 opt;
 	const char *prop;
@@ -1722,7 +1718,8 @@ static void __arm_smmu_release_pci_iommudata(void *data)
 static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
 {
 	struct device_node *of_node;
-	struct arm_smmu_device *curr, *smmu = NULL;
+	struct platform_device *smmu_pdev;
+	struct arm_smmu_device *smmu = NULL;
 	struct pci_bus *bus = pdev->bus;
 
 	/* Walk up to the root bus */
@@ -1735,14 +1732,10 @@ static struct arm_smmu_device *arm_smmu_get_for_pci_dev(struct pci_dev *pdev)
 		return NULL;
 
 	/* See if we can find an SMMU corresponding to the phandle */
-	spin_lock(&arm_smmu_devices_lock);
-	list_for_each_entry(curr, &arm_smmu_devices, list) {
-		if (curr->dev->of_node == of_node) {
-			smmu = curr;
-			break;
-		}
-	}
-	spin_unlock(&arm_smmu_devices_lock);
+	smmu_pdev = of_find_device_by_node(of_node);
+	if (smmu_pdev)
+		smmu = platform_get_drvdata(smmu_pdev);
+
 	of_node_put(of_node);
 	return smmu;
 }
@@ -2609,10 +2602,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev)
 		goto out_free_structures;
 
 	/* Record our private device structure */
-	INIT_LIST_HEAD(&smmu->list);
-	spin_lock(&arm_smmu_devices_lock);
-	list_add(&smmu->list, &arm_smmu_devices);
-	spin_unlock(&arm_smmu_devices_lock);
+	platform_set_drvdata(pdev, smmu);
 	return 0;
 
 out_free_structures:
@@ -2622,21 +2612,7 @@ out_free_structures:
 
 static int arm_smmu_device_remove(struct platform_device *pdev)
 {
-	struct arm_smmu_device *curr, *smmu = NULL;
-	struct device *dev = &pdev->dev;
-
-	spin_lock(&arm_smmu_devices_lock);
-	list_for_each_entry(curr, &arm_smmu_devices, list) {
-		if (curr->dev == dev) {
-			smmu = curr;
-			list_del(&smmu->list);
-			break;
-		}
-	}
-	spin_unlock(&arm_smmu_devices_lock);
-
-	if (!smmu)
-		return -ENODEV;
+	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
 
 	arm_smmu_device_disable(smmu);
 	arm_smmu_free_structures(smmu);
-- 
cgit v1.2.3


From 077124c98da3aa5aaf498fd0980ed14eaf777c09 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@freescale.com>
Date: Tue, 18 Aug 2015 17:12:24 +0100
Subject: iommu/arm-smmu: Remove unneeded '0x' annotation

'%pad' automatically prints with '0x', so remove the explicit '0x'
annotation.

Signed-off-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 48a39dfa9777..677fba9e6b11 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1222,7 +1222,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 	if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
 				      !(tmp & ATSR_ACTIVE), 5, 50)) {
 		dev_err(dev,
-			"iova to phys timed out on 0x%pad. Falling back to software table walk.\n",
+			"iova to phys timed out on %pad. Falling back to software table walk.\n",
 			&iova);
 		return ops->iova_to_phys(ops, iova);
 	}
-- 
cgit v1.2.3


From 668b4ada1cdf406dac9f72503fa2f69f31bed0c5 Mon Sep 17 00:00:00 2001
From: Tirumalesh Chalamarla <tchalamarla@caviumnetworks.com>
Date: Wed, 19 Aug 2015 00:40:30 +0100
Subject: iommu/arm-smmu: ThunderX mis-extends 64bit registers

The SMMU architecture defines two different behaviors when 64-bit
registers are written with 32-bit writes.  The first behavior causes
zero extension into the upper 32-bits.  The second behavior splits a
64-bit register into "normal" 32-bit register pairs.

On some buggy implementations, registers incorrectly zero extended
when they should instead behave as normal 32-bit register pairs.

Signed-off-by: Tirumalesh Chalamarla <tchalamarla@caviumnetworks.com>
[will: removed redundant macro parameters]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 50 ++++++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

(limited to 'drivers')

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 677fba9e6b11..9aadf36a0747 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -70,6 +70,18 @@
 		((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)	\
 			? 0x400 : 0))
 
+#ifdef CONFIG_64BIT
+#define smmu_writeq	writeq_relaxed
+#else
+#define smmu_writeq(reg64, addr)				\
+	do {							\
+		u64 __val = (reg64);				\
+		void __iomem *__addr = (addr);			\
+		writel_relaxed(__val >> 32, __addr + 4);	\
+		writel_relaxed(__val, __addr);			\
+	} while (0)
+#endif
+
 /* Configuration registers */
 #define ARM_SMMU_GR0_sCR0		0x0
 #define sCR0_CLIENTPD			(1 << 0)
@@ -185,10 +197,8 @@
 #define ARM_SMMU_CB_SCTLR		0x0
 #define ARM_SMMU_CB_RESUME		0x8
 #define ARM_SMMU_CB_TTBCR2		0x10
-#define ARM_SMMU_CB_TTBR0_LO		0x20
-#define ARM_SMMU_CB_TTBR0_HI		0x24
-#define ARM_SMMU_CB_TTBR1_LO		0x28
-#define ARM_SMMU_CB_TTBR1_HI		0x2c
+#define ARM_SMMU_CB_TTBR0		0x20
+#define ARM_SMMU_CB_TTBR1		0x28
 #define ARM_SMMU_CB_TTBCR		0x30
 #define ARM_SMMU_CB_S1_MAIR0		0x38
 #define ARM_SMMU_CB_S1_MAIR1		0x3c
@@ -226,7 +236,7 @@
 #define TTBCR2_SEP_SHIFT		15
 #define TTBCR2_SEP_UPSTREAM		(0x7 << TTBCR2_SEP_SHIFT)
 
-#define TTBRn_HI_ASID_SHIFT            16
+#define TTBRn_ASID_SHIFT		48
 
 #define FSR_MULTI			(1 << 31)
 #define FSR_SS				(1 << 30)
@@ -695,6 +705,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
 	u32 reg;
+	u64 reg64;
 	bool stage1;
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -738,22 +749,17 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 
 	/* TTBRs */
 	if (stage1) {
-		reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
-		writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-		reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0] >> 32;
-		reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-		writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-
-		reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
-		writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_LO);
-		reg = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1] >> 32;
-		reg |= ARM_SMMU_CB_ASID(cfg) << TTBRn_HI_ASID_SHIFT;
-		writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1_HI);
+		reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
+
+		reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+		smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+
+		reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
+		reg64 |= ((u64)ARM_SMMU_CB_ASID(cfg)) << TTBRn_ASID_SHIFT;
+		smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR1);
 	} else {
-		reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
-		writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-		reg = pgtbl_cfg->arm_lpae_s2_cfg.vttbr >> 32;
-		writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
+		reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
+		smmu_writeq(reg64, cb_base + ARM_SMMU_CB_TTBR0);
 	}
 
 	/* TTBCR */
@@ -1212,11 +1218,9 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
 
 	/* ATS1 registers can only be written atomically */
 	va = iova & ~0xfffUL;
-#ifdef CONFIG_64BIT
 	if (smmu->version == ARM_SMMU_V2)
-		writeq_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
+		smmu_writeq(va, cb_base + ARM_SMMU_CB_ATS1PR);
 	else
-#endif
 		writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
 
 	if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
-- 
cgit v1.2.3


From dfddb969edf021f21a45fc6fd019db4f99d12308 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 9 Oct 2015 18:16:46 -0400
Subject: iommu/vt-d: Switch from ioremap_cache to memremap

In preparation for deprecating ioremap_cache() convert its usage in
intel-iommu to memremap.  This also eliminates the mishandling of the
__iomem annotation in the implementation.

Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel-iommu.c         | 20 +++++++++++---------
 drivers/iommu/intel_irq_remapping.c |  8 ++++----
 2 files changed, 15 insertions(+), 13 deletions(-)

(limited to 'drivers')

diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 6ac6e741c46e..c5426e847255 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -34,6 +34,7 @@
 #include <linux/mempool.h>
 #include <linux/memory.h>
 #include <linux/timer.h>
+#include <linux/io.h>
 #include <linux/iova.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
@@ -2809,18 +2810,18 @@ static void intel_iommu_init_qi(struct intel_iommu *iommu)
 }
 
 static int copy_context_table(struct intel_iommu *iommu,
-			      struct root_entry __iomem *old_re,
+			      struct root_entry *old_re,
 			      struct context_entry **tbl,
 			      int bus, bool ext)
 {
 	int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
-	struct context_entry __iomem *old_ce = NULL;
 	struct context_entry *new_ce = NULL, ce;
+	struct context_entry *old_ce = NULL;
 	struct root_entry re;
 	phys_addr_t old_ce_phys;
 
 	tbl_idx = ext ? bus * 2 : bus;
-	memcpy_fromio(&re, old_re, sizeof(re));
+	memcpy(&re, old_re, sizeof(re));
 
 	for (devfn = 0; devfn < 256; devfn++) {
 		/* First calculate the correct index */
@@ -2855,7 +2856,8 @@ static int copy_context_table(struct intel_iommu *iommu,
 			}
 
 			ret = -ENOMEM;
-			old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
+			old_ce = memremap(old_ce_phys, PAGE_SIZE,
+					MEMREMAP_WB);
 			if (!old_ce)
 				goto out;
 
@@ -2867,7 +2869,7 @@ static int copy_context_table(struct intel_iommu *iommu,
 		}
 
 		/* Now copy the context entry */
-		memcpy_fromio(&ce, old_ce + idx, sizeof(ce));
+		memcpy(&ce, old_ce + idx, sizeof(ce));
 
 		if (!__context_present(&ce))
 			continue;
@@ -2903,7 +2905,7 @@ static int copy_context_table(struct intel_iommu *iommu,
 	__iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
 
 out_unmap:
-	iounmap(old_ce);
+	memunmap(old_ce);
 
 out:
 	return ret;
@@ -2911,8 +2913,8 @@ out:
 
 static int copy_translation_tables(struct intel_iommu *iommu)
 {
-	struct root_entry __iomem *old_rt;
 	struct context_entry **ctxt_tbls;
+	struct root_entry *old_rt;
 	phys_addr_t old_rt_phys;
 	int ctxt_table_entries;
 	unsigned long flags;
@@ -2937,7 +2939,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
 	if (!old_rt_phys)
 		return -EINVAL;
 
-	old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
+	old_rt = memremap(old_rt_phys, PAGE_SIZE, MEMREMAP_WB);
 	if (!old_rt)
 		return -ENOMEM;
 
@@ -2986,7 +2988,7 @@ static int copy_translation_tables(struct intel_iommu *iommu)
 	ret = 0;
 
 out_unmap:
-	iounmap(old_rt);
+	memunmap(old_rt);
 
 	return ret;
 }
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 9ec4e0d94ffd..bdc52cc6ed23 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -384,7 +384,7 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev)
 
 static int iommu_load_old_irte(struct intel_iommu *iommu)
 {
-	struct irte __iomem *old_ir_table;
+	struct irte *old_ir_table;
 	phys_addr_t irt_phys;
 	unsigned int i;
 	size_t size;
@@ -408,12 +408,12 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
 	size     = INTR_REMAP_TABLE_ENTRIES*sizeof(struct irte);
 
 	/* Map the old IR table */
-	old_ir_table = ioremap_cache(irt_phys, size);
+	old_ir_table = memremap(irt_phys, size, MEMREMAP_WB);
 	if (!old_ir_table)
 		return -ENOMEM;
 
 	/* Copy data over */
-	memcpy_fromio(iommu->ir_table->base, old_ir_table, size);
+	memcpy(iommu->ir_table->base, old_ir_table, size);
 
 	__iommu_flush_cache(iommu, iommu->ir_table->base, size);
 
@@ -426,7 +426,7 @@ static int iommu_load_old_irte(struct intel_iommu *iommu)
 			bitmap_set(iommu->ir_table->bitmap, i, 1);
 	}
 
-	iounmap(old_ir_table);
+	memunmap(old_ir_table);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 344cb4e0b6f3a0dbef0643eacb4946338eb228c0 Mon Sep 17 00:00:00 2001
From: Feng Wu <feng.wu@intel.com>
Date: Thu, 15 Oct 2015 10:19:11 +0800
Subject: iommu/vt-d: Use cmpxchg16b to update posted format IRTE atomically

If IRTE is in posted format, the 'pda' field goes across the 64-bit
boundary, we need use cmpxchg16b to atomically update it. We only
expose posted-interrupt when X86_FEATURE_CX16 is supported and use
to update it atomically.

Signed-off-by: Feng Wu <feng.wu@intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel_irq_remapping.c | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

(limited to 'drivers')

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index bdc52cc6ed23..a50468e7d8c2 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -169,8 +169,26 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
-	set_64bit(&irte->low, irte_modified->low);
-	set_64bit(&irte->high, irte_modified->high);
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
+	if ((irte->pst == 1) || (irte_modified->pst == 1)) {
+		bool ret;
+
+		ret = cmpxchg_double(&irte->low, &irte->high,
+				     irte->low, irte->high,
+				     irte_modified->low, irte_modified->high);
+		/*
+		 * We use cmpxchg16 to atomically update the 128-bit IRTE,
+		 * and it cannot be updated by the hardware or other processors
+		 * behind us, so the return value of cmpxchg16 should be the
+		 * same as the old value.
+		 */
+		WARN_ON(!ret);
+	} else
+#endif
+	{
+		set_64bit(&irte->low, irte_modified->low);
+		set_64bit(&irte->high, irte_modified->high);
+	}
 	__iommu_flush_cache(iommu, irte, sizeof(*irte));
 
 	rc = qi_flush_iec(iommu, index, 0);
@@ -727,7 +745,16 @@ static inline void set_irq_posting_cap(void)
 	struct intel_iommu *iommu;
 
 	if (!disable_irq_post) {
-		intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
+		/*
+		 * If IRTE is in posted format, the 'pda' field goes across the
+		 * 64-bit boundary, we need use cmpxchg16b to atomically update
+		 * it. We only expose posted-interrupt when X86_FEATURE_CX16
+		 * is supported. Actually, hardware platforms supporting PI
+		 * should have X86_FEATURE_CX16 support, this has been confirmed
+		 * with Intel hardware guys.
+		 */
+		if ( cpu_has_cx16 )
+			intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
 
 		for_each_iommu(iommu, drhd)
 			if (!cap_pi_support(iommu->cap)) {
-- 
cgit v1.2.3


From 0db2e5d18f76a66ca945447d9f610bed0a94ca5a Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 1 Oct 2015 20:13:58 +0100
Subject: iommu: Implement common IOMMU ops for DMA mapping

Taking inspiration from the existing arch/arm code, break out some
generic functions to interface the DMA-API to the IOMMU-API. This will
do the bulk of the heavy lifting for IOMMU-backed dma-mapping.

Since associating an IOVA allocator with an IOMMU domain is a fairly
common need, rather than introduce yet another private structure just to
do this for ourselves, extend the top-level struct iommu_domain with the
notion. A simple opaque cookie allows reuse by other IOMMU API users
with their various different incompatible allocator types.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Kconfig     |   7 +
 drivers/iommu/Makefile    |   1 +
 drivers/iommu/dma-iommu.c | 524 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 532 insertions(+)
 create mode 100644 drivers/iommu/dma-iommu.c

(limited to 'drivers')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d9da766719c8..7414f33acfba 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -49,6 +49,13 @@ config OF_IOMMU
        def_bool y
        depends on OF && IOMMU_API
 
+# IOMMU-agnostic DMA-mapping layer
+config IOMMU_DMA
+	bool
+	depends on NEED_SG_DMA_LENGTH
+	select IOMMU_API
+	select IOMMU_IOVA
+
 config FSL_PAMU
 	bool "Freescale IOMMU support"
 	depends on PPC32
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index c6dcc513d711..f465cfbdb183 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
+obj-$(CONFIG_IOMMU_DMA) += dma-iommu.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE) += io-pgtable.o
 obj-$(CONFIG_IOMMU_IO_PGTABLE_LPAE) += io-pgtable-arm.o
 obj-$(CONFIG_IOMMU_IOVA) += iova.o
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
new file mode 100644
index 000000000000..3a20db4f8604
--- /dev/null
+++ b/drivers/iommu/dma-iommu.c
@@ -0,0 +1,524 @@
+/*
+ * A fairly generic DMA-API to IOMMU-API glue layer.
+ *
+ * Copyright (C) 2014-2015 ARM Ltd.
+ *
+ * based in part on arch/arm/mm/dma-mapping.c:
+ * Copyright (C) 2000-2004 Russell King
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/dma-iommu.h>
+#include <linux/huge_mm.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
+#include <linux/mm.h>
+
+int iommu_dma_init(void)
+{
+	return iova_cache_get();
+}
+
+/**
+ * iommu_get_dma_cookie - Acquire DMA-API resources for a domain
+ * @domain: IOMMU domain to prepare for DMA-API usage
+ *
+ * IOMMU drivers should normally call this from their domain_alloc
+ * callback when domain->type == IOMMU_DOMAIN_DMA.
+ */
+int iommu_get_dma_cookie(struct iommu_domain *domain)
+{
+	struct iova_domain *iovad;
+
+	if (domain->iova_cookie)
+		return -EEXIST;
+
+	iovad = kzalloc(sizeof(*iovad), GFP_KERNEL);
+	domain->iova_cookie = iovad;
+
+	return iovad ? 0 : -ENOMEM;
+}
+EXPORT_SYMBOL(iommu_get_dma_cookie);
+
+/**
+ * iommu_put_dma_cookie - Release a domain's DMA mapping resources
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ *
+ * IOMMU drivers should normally call this from their domain_free callback.
+ */
+void iommu_put_dma_cookie(struct iommu_domain *domain)
+{
+	struct iova_domain *iovad = domain->iova_cookie;
+
+	if (!iovad)
+		return;
+
+	put_iova_domain(iovad);
+	kfree(iovad);
+	domain->iova_cookie = NULL;
+}
+EXPORT_SYMBOL(iommu_put_dma_cookie);
+
+/**
+ * iommu_dma_init_domain - Initialise a DMA mapping domain
+ * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
+ * @base: IOVA at which the mappable address space starts
+ * @size: Size of IOVA space
+ *
+ * @base and @size should be exact multiples of IOMMU page granularity to
+ * avoid rounding surprises. If necessary, we reserve the page at address 0
+ * to ensure it is an invalid IOVA. It is safe to reinitialise a domain, but
+ * any change which could make prior IOVAs invalid will fail.
+ */
+int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size)
+{
+	struct iova_domain *iovad = domain->iova_cookie;
+	unsigned long order, base_pfn, end_pfn;
+
+	if (!iovad)
+		return -ENODEV;
+
+	/* Use the smallest supported page size for IOVA granularity */
+	order = __ffs(domain->ops->pgsize_bitmap);
+	base_pfn = max_t(unsigned long, 1, base >> order);
+	end_pfn = (base + size - 1) >> order;
+
+	/* Check the domain allows at least some access to the device... */
+	if (domain->geometry.force_aperture) {
+		if (base > domain->geometry.aperture_end ||
+		    base + size <= domain->geometry.aperture_start) {
+			pr_warn("specified DMA range outside IOMMU capability\n");
+			return -EFAULT;
+		}
+		/* ...then finally give it a kicking to make sure it fits */
+		base_pfn = max_t(unsigned long, base_pfn,
+				domain->geometry.aperture_start >> order);
+		end_pfn = min_t(unsigned long, end_pfn,
+				domain->geometry.aperture_end >> order);
+	}
+
+	/* All we can safely do with an existing domain is enlarge it */
+	if (iovad->start_pfn) {
+		if (1UL << order != iovad->granule ||
+		    base_pfn != iovad->start_pfn ||
+		    end_pfn < iovad->dma_32bit_pfn) {
+			pr_warn("Incompatible range for DMA domain\n");
+			return -EFAULT;
+		}
+		iovad->dma_32bit_pfn = end_pfn;
+	} else {
+		init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(iommu_dma_init_domain);
+
+/**
+ * dma_direction_to_prot - Translate DMA API directions to IOMMU API page flags
+ * @dir: Direction of DMA transfer
+ * @coherent: Is the DMA master cache-coherent?
+ *
+ * Return: corresponding IOMMU API page protection flags
+ */
+int dma_direction_to_prot(enum dma_data_direction dir, bool coherent)
+{
+	int prot = coherent ? IOMMU_CACHE : 0;
+
+	switch (dir) {
+	case DMA_BIDIRECTIONAL:
+		return prot | IOMMU_READ | IOMMU_WRITE;
+	case DMA_TO_DEVICE:
+		return prot | IOMMU_READ;
+	case DMA_FROM_DEVICE:
+		return prot | IOMMU_WRITE;
+	default:
+		return 0;
+	}
+}
+
+static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size,
+		dma_addr_t dma_limit)
+{
+	unsigned long shift = iova_shift(iovad);
+	unsigned long length = iova_align(iovad, size) >> shift;
+
+	/*
+	 * Enforce size-alignment to be safe - there could perhaps be an
+	 * attribute to control this per-device, or at least per-domain...
+	 */
+	return alloc_iova(iovad, length, dma_limit >> shift, true);
+}
+
+/* The IOVA allocator knows what we mapped, so just unmap whatever that was */
+static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr)
+{
+	struct iova_domain *iovad = domain->iova_cookie;
+	unsigned long shift = iova_shift(iovad);
+	unsigned long pfn = dma_addr >> shift;
+	struct iova *iova = find_iova(iovad, pfn);
+	size_t size;
+
+	if (WARN_ON(!iova))
+		return;
+
+	size = iova_size(iova) << shift;
+	size -= iommu_unmap(domain, pfn << shift, size);
+	/* ...and if we can't, then something is horribly, horribly wrong */
+	WARN_ON(size > 0);
+	__free_iova(iovad, iova);
+}
+
+static void __iommu_dma_free_pages(struct page **pages, int count)
+{
+	while (count--)
+		__free_page(pages[count]);
+	kvfree(pages);
+}
+
+static struct page **__iommu_dma_alloc_pages(unsigned int count, gfp_t gfp)
+{
+	struct page **pages;
+	unsigned int i = 0, array_size = count * sizeof(*pages);
+
+	if (array_size <= PAGE_SIZE)
+		pages = kzalloc(array_size, GFP_KERNEL);
+	else
+		pages = vzalloc(array_size);
+	if (!pages)
+		return NULL;
+
+	/* IOMMU can map any pages, so himem can also be used here */
+	gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
+
+	while (count) {
+		struct page *page = NULL;
+		int j, order = __fls(count);
+
+		/*
+		 * Higher-order allocations are a convenience rather
+		 * than a necessity, hence using __GFP_NORETRY until
+		 * falling back to single-page allocations.
+		 */
+		for (order = min(order, MAX_ORDER); order > 0; order--) {
+			page = alloc_pages(gfp | __GFP_NORETRY, order);
+			if (!page)
+				continue;
+			if (PageCompound(page)) {
+				if (!split_huge_page(page))
+					break;
+				__free_pages(page, order);
+			} else {
+				split_page(page, order);
+				break;
+			}
+		}
+		if (!page)
+			page = alloc_page(gfp);
+		if (!page) {
+			__iommu_dma_free_pages(pages, i);
+			return NULL;
+		}
+		j = 1 << order;
+		count -= j;
+		while (j--)
+			pages[i++] = page++;
+	}
+	return pages;
+}
+
+/**
+ * iommu_dma_free - Free a buffer allocated by iommu_dma_alloc()
+ * @dev: Device which owns this buffer
+ * @pages: Array of buffer pages as returned by iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @handle: DMA address of buffer
+ *
+ * Frees both the pages associated with the buffer, and the array
+ * describing them
+ */
+void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
+		dma_addr_t *handle)
+{
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle);
+	__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
+	*handle = DMA_ERROR_CODE;
+}
+
+/**
+ * iommu_dma_alloc - Allocate and map a buffer contiguous in IOVA space
+ * @dev: Device to allocate memory for. Must be a real device
+ *	 attached to an iommu_dma_domain
+ * @size: Size of buffer in bytes
+ * @gfp: Allocation flags
+ * @prot: IOMMU mapping flags
+ * @handle: Out argument for allocated DMA handle
+ * @flush_page: Arch callback which must ensure PAGE_SIZE bytes from the
+ *		given VA/PA are visible to the given non-coherent device.
+ *
+ * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
+ * but an IOMMU which supports smaller pages might not map the whole thing.
+ *
+ * Return: Array of struct page pointers describing the buffer,
+ *	   or NULL on failure.
+ */
+struct page **iommu_dma_alloc(struct device *dev, size_t size,
+		gfp_t gfp, int prot, dma_addr_t *handle,
+		void (*flush_page)(struct device *, const void *, phys_addr_t))
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova *iova;
+	struct page **pages;
+	struct sg_table sgt;
+	dma_addr_t dma_addr;
+	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	*handle = DMA_ERROR_CODE;
+
+	pages = __iommu_dma_alloc_pages(count, gfp);
+	if (!pages)
+		return NULL;
+
+	iova = __alloc_iova(iovad, size, dev->coherent_dma_mask);
+	if (!iova)
+		goto out_free_pages;
+
+	size = iova_align(iovad, size);
+	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
+		goto out_free_iova;
+
+	if (!(prot & IOMMU_CACHE)) {
+		struct sg_mapping_iter miter;
+		/*
+		 * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
+		 * sufficient here, so skip it by using the "wrong" direction.
+		 */
+		sg_miter_start(&miter, sgt.sgl, sgt.orig_nents, SG_MITER_FROM_SG);
+		while (sg_miter_next(&miter))
+			flush_page(dev, miter.addr, page_to_phys(miter.page));
+		sg_miter_stop(&miter);
+	}
+
+	dma_addr = iova_dma_addr(iovad, iova);
+	if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot)
+			< size)
+		goto out_free_sg;
+
+	*handle = dma_addr;
+	sg_free_table(&sgt);
+	return pages;
+
+out_free_sg:
+	sg_free_table(&sgt);
+out_free_iova:
+	__free_iova(iovad, iova);
+out_free_pages:
+	__iommu_dma_free_pages(pages, count);
+	return NULL;
+}
+
+/**
+ * iommu_dma_mmap - Map a buffer into provided user VMA
+ * @pages: Array representing buffer from iommu_dma_alloc()
+ * @size: Size of buffer in bytes
+ * @vma: VMA describing requested userspace mapping
+ *
+ * Maps the pages of the buffer in @pages into @vma. The caller is responsible
+ * for verifying the correct size and protection of @vma beforehand.
+ */
+
+int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma)
+{
+	unsigned long uaddr = vma->vm_start;
+	unsigned int i, count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	int ret = -ENXIO;
+
+	for (i = vma->vm_pgoff; i < count && uaddr < vma->vm_end; i++) {
+		ret = vm_insert_page(vma, uaddr, pages[i]);
+		if (ret)
+			break;
+		uaddr += PAGE_SIZE;
+	}
+	return ret;
+}
+
+dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
+		unsigned long offset, size_t size, int prot)
+{
+	dma_addr_t dma_addr;
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iova_domain *iovad = domain->iova_cookie;
+	phys_addr_t phys = page_to_phys(page) + offset;
+	size_t iova_off = iova_offset(iovad, phys);
+	size_t len = iova_align(iovad, size + iova_off);
+	struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev));
+
+	if (!iova)
+		return DMA_ERROR_CODE;
+
+	dma_addr = iova_dma_addr(iovad, iova);
+	if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) {
+		__free_iova(iovad, iova);
+		return DMA_ERROR_CODE;
+	}
+	return dma_addr + iova_off;
+}
+
+void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
+		enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle);
+}
+
+/*
+ * Prepare a successfully-mapped scatterlist to give back to the caller.
+ * Handling IOVA concatenation can come later, if needed
+ */
+static int __finalise_sg(struct device *dev, struct scatterlist *sg, int nents,
+		dma_addr_t dma_addr)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		/* Un-swizzling the fields here, hence the naming mismatch */
+		unsigned int s_offset = sg_dma_address(s);
+		unsigned int s_length = sg_dma_len(s);
+		unsigned int s_dma_len = s->length;
+
+		s->offset = s_offset;
+		s->length = s_length;
+		sg_dma_address(s) = dma_addr + s_offset;
+		dma_addr += s_dma_len;
+	}
+	return i;
+}
+
+/*
+ * If mapping failed, then just restore the original list,
+ * but making sure the DMA fields are invalidated.
+ */
+static void __invalidate_sg(struct scatterlist *sg, int nents)
+{
+	struct scatterlist *s;
+	int i;
+
+	for_each_sg(sg, s, nents, i) {
+		if (sg_dma_address(s) != DMA_ERROR_CODE)
+			s->offset = sg_dma_address(s);
+		if (sg_dma_len(s))
+			s->length = sg_dma_len(s);
+		sg_dma_address(s) = DMA_ERROR_CODE;
+		sg_dma_len(s) = 0;
+	}
+}
+
+/*
+ * The DMA API client is passing in a scatterlist which could describe
+ * any old buffer layout, but the IOMMU API requires everything to be
+ * aligned to IOMMU pages. Hence the need for this complicated bit of
+ * impedance-matching, to be able to hand off a suitably-aligned list,
+ * but still preserve the original offsets and sizes for the caller.
+ */
+int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
+		int nents, int prot)
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+	struct iova_domain *iovad = domain->iova_cookie;
+	struct iova *iova;
+	struct scatterlist *s, *prev = NULL;
+	dma_addr_t dma_addr;
+	size_t iova_len = 0;
+	int i;
+
+	/*
+	 * Work out how much IOVA space we need, and align the segments to
+	 * IOVA granules for the IOMMU driver to handle. With some clever
+	 * trickery we can modify the list in-place, but reversibly, by
+	 * hiding the original data in the as-yet-unused DMA fields.
+	 */
+	for_each_sg(sg, s, nents, i) {
+		size_t s_offset = iova_offset(iovad, s->offset);
+		size_t s_length = s->length;
+
+		sg_dma_address(s) = s->offset;
+		sg_dma_len(s) = s_length;
+		s->offset -= s_offset;
+		s_length = iova_align(iovad, s_length + s_offset);
+		s->length = s_length;
+
+		/*
+		 * The simple way to avoid the rare case of a segment
+		 * crossing the boundary mask is to pad the previous one
+		 * to end at a naturally-aligned IOVA for this one's size,
+		 * at the cost of potentially over-allocating a little.
+		 */
+		if (prev) {
+			size_t pad_len = roundup_pow_of_two(s_length);
+
+			pad_len = (pad_len - iova_len) & (pad_len - 1);
+			prev->length += pad_len;
+			iova_len += pad_len;
+		}
+
+		iova_len += s_length;
+		prev = s;
+	}
+
+	iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev));
+	if (!iova)
+		goto out_restore_sg;
+
+	/*
+	 * We'll leave any physical concatenation to the IOMMU driver's
+	 * implementation - it knows better than we do.
+	 */
+	dma_addr = iova_dma_addr(iovad, iova);
+	if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len)
+		goto out_free_iova;
+
+	return __finalise_sg(dev, sg, nents, dma_addr);
+
+out_free_iova:
+	__free_iova(iovad, iova);
+out_restore_sg:
+	__invalidate_sg(sg, nents);
+	return 0;
+}
+
+void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
+		enum dma_data_direction dir, struct dma_attrs *attrs)
+{
+	/*
+	 * The scatterlist segments are mapped into a single
+	 * contiguous IOVA allocation, so this is incredibly easy.
+	 */
+	__iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg));
+}
+
+int iommu_dma_supported(struct device *dev, u64 mask)
+{
+	/*
+	 * 'Special' IOMMUs which don't have the same addressing capability
+	 * as the CPU will have to wait until we have some way to query that
+	 * before they'll be able to use this framework.
+	 */
+	return 1;
+}
+
+int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+	return dma_addr == DMA_ERROR_CODE;
+}
-- 
cgit v1.2.3


From c0733a2cf30c1e7923b6ad4f8df67941502923de Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 13 Oct 2015 17:51:14 +0100
Subject: iommu/arm-smmu: Fix error checking for ASID and VMID allocation

The bitmap allocator returns an int, which is one of the standard
negative values on failure. Rather than assigning this straight to a
u16 (like we do for the ASID and VMID callers), which means that we
won't detect failure correctly, use an int for the purposes of error
checking.

Cc: <stable@vger.kernel.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu-v3.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'drivers')

diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 637014ab8965..970d78dcac53 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1419,7 +1419,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
 	int ret;
-	u16 asid;
+	int asid;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
 
@@ -1431,10 +1431,11 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
 					 &cfg->cdptr_dma, GFP_KERNEL);
 	if (!cfg->cdptr) {
 		dev_warn(smmu->dev, "failed to allocate context descriptor\n");
+		ret = -ENOMEM;
 		goto out_free_asid;
 	}
 
-	cfg->cd.asid	= asid;
+	cfg->cd.asid	= (u16)asid;
 	cfg->cd.ttbr	= pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
 	cfg->cd.tcr	= pgtbl_cfg->arm_lpae_s1_cfg.tcr;
 	cfg->cd.mair	= pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
@@ -1448,7 +1449,7 @@ out_free_asid:
 static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
 				       struct io_pgtable_cfg *pgtbl_cfg)
 {
-	u16 vmid;
+	int vmid;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
 	struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
 
@@ -1456,7 +1457,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
 	if (IS_ERR_VALUE(vmid))
 		return vmid;
 
-	cfg->vmid	= vmid;
+	cfg->vmid	= (u16)vmid;
 	cfg->vttbr	= pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
 	cfg->vtcr	= pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
 	return 0;
-- 
cgit v1.2.3


From c88ae5de71629e4b5e4075897d9980a7b079d122 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 13 Oct 2015 17:53:24 +0100
Subject: iommu/arm-smmu: Remove redundant calculation of gr0 base address

Since commit 1463fe44fd0f ("iommu/arm-smmu: Don't use VMIDs for stage-1
translations"), we don't need the GR0 base address when initialising a
context bank, so remove the useless local variable and its init code.

Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 9aadf36a0747..7c20a68b5a95 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -709,9 +709,8 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
 	bool stage1;
 	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
 	struct arm_smmu_device *smmu = smmu_domain->smmu;
-	void __iomem *cb_base, *gr0_base, *gr1_base;
+	void __iomem *cb_base, *gr1_base;
 
-	gr0_base = ARM_SMMU_GR0(smmu);
 	gr1_base = ARM_SMMU_GR1(smmu);
 	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
 	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
-- 
cgit v1.2.3


From 166bdbd23161160f2abcea70621adba179050bee Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 13 Oct 2015 18:32:30 +0100
Subject: iommu/arm-smmu: Add support for MSI on SMMUv3

Despite being a platform device, the SMMUv3 is capable of signaling
interrupts using MSIs. Hook it into the platform MSI framework and
enjoy faults being reported in a new and exciting way.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
[will: tidied up the binding example and reworked most of the code]
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/Kconfig       |   1 +
 drivers/iommu/arm-smmu-v3.c | 105 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 98 insertions(+), 8 deletions(-)

(limited to 'drivers')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index d9da766719c8..cc2dfa572d07 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -362,6 +362,7 @@ config ARM_SMMU_V3
 	depends on ARM64 && PCI
 	select IOMMU_API
 	select IOMMU_IO_PGTABLE_LPAE
+	select GENERIC_MSI_IRQ_DOMAIN
 	help
 	  Support for implementations of the ARM System MMU architecture
 	  version 3 providing translation support to a PCIe root complex.
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 970d78dcac53..5a36ef70f6f4 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -26,6 +26,7 @@
 #include <linux/iommu.h>
 #include <linux/iopoll.h>
 #include <linux/module.h>
+#include <linux/msi.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
@@ -402,6 +403,31 @@ enum pri_resp {
 	PRI_RESP_SUCC,
 };
 
+enum arm_smmu_msi_index {
+	EVTQ_MSI_INDEX,
+	GERROR_MSI_INDEX,
+	PRIQ_MSI_INDEX,
+	ARM_SMMU_MAX_MSIS,
+};
+
+static phys_addr_t arm_smmu_msi_cfg[ARM_SMMU_MAX_MSIS][3] = {
+	[EVTQ_MSI_INDEX] = {
+		ARM_SMMU_EVTQ_IRQ_CFG0,
+		ARM_SMMU_EVTQ_IRQ_CFG1,
+		ARM_SMMU_EVTQ_IRQ_CFG2,
+	},
+	[GERROR_MSI_INDEX] = {
+		ARM_SMMU_GERROR_IRQ_CFG0,
+		ARM_SMMU_GERROR_IRQ_CFG1,
+		ARM_SMMU_GERROR_IRQ_CFG2,
+	},
+	[PRIQ_MSI_INDEX] = {
+		ARM_SMMU_PRIQ_IRQ_CFG0,
+		ARM_SMMU_PRIQ_IRQ_CFG1,
+		ARM_SMMU_PRIQ_IRQ_CFG2,
+	},
+};
+
 struct arm_smmu_cmdq_ent {
 	/* Common fields */
 	u8				opcode;
@@ -2176,6 +2202,72 @@ static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
 					  1, ARM_SMMU_POLL_TIMEOUT_US);
 }
 
+static void arm_smmu_free_msis(void *data)
+{
+	struct device *dev = data;
+	platform_msi_domain_free_irqs(dev);
+}
+
+static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg)
+{
+	phys_addr_t doorbell;
+	struct device *dev = msi_desc_to_dev(desc);
+	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+	phys_addr_t *cfg = arm_smmu_msi_cfg[desc->platform.msi_index];
+
+	doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo;
+	doorbell &= MSI_CFG0_ADDR_MASK << MSI_CFG0_ADDR_SHIFT;
+
+	writeq_relaxed(doorbell, smmu->base + cfg[0]);
+	writel_relaxed(msg->data, smmu->base + cfg[1]);
+	writel_relaxed(MSI_CFG2_MEMATTR_DEVICE_nGnRE, smmu->base + cfg[2]);
+}
+
+static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
+{
+	struct msi_desc *desc;
+	int ret, nvec = ARM_SMMU_MAX_MSIS;
+	struct device *dev = smmu->dev;
+
+	/* Clear the MSI address regs */
+	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
+	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+
+	if (smmu->features & ARM_SMMU_FEAT_PRI)
+		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
+	else
+		nvec--;
+
+	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
+		return;
+
+	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
+	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
+	if (ret) {
+		dev_warn(dev, "failed to allocate MSIs\n");
+		return;
+	}
+
+	for_each_msi_entry(desc, dev) {
+		switch (desc->platform.msi_index) {
+		case EVTQ_MSI_INDEX:
+			smmu->evtq.q.irq = desc->irq;
+			break;
+		case GERROR_MSI_INDEX:
+			smmu->gerr_irq = desc->irq;
+			break;
+		case PRIQ_MSI_INDEX:
+			smmu->priq.q.irq = desc->irq;
+			break;
+		default:	/* Unknown */
+			continue;
+		}
+	}
+
+	/* Add callback to free MSIs on teardown */
+	devm_add_action(dev, arm_smmu_free_msis, dev);
+}
+
 static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 {
 	int ret, irq;
@@ -2189,11 +2281,9 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 		return ret;
 	}
 
-	/* Clear the MSI address regs */
-	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
-	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+	arm_smmu_setup_msis(smmu);
 
-	/* Request wired interrupt lines */
+	/* Request interrupt lines */
 	irq = smmu->evtq.q.irq;
 	if (irq) {
 		ret = devm_request_threaded_irq(smmu->dev, irq,
@@ -2222,8 +2312,6 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 	}