summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-26 10:50:10 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-26 10:50:10 -0700
commitd1f2b1710d92a80d60351503bbf41cdac95ed7a8 (patch)
treecc8247f2917c38b476b8777c29e3c9a154445d9e
parent18d0eae30e6a4f8644d589243d7ac1d70d29203d (diff)
parent2f2fbfb71ecc221352d84ae6430b42031ae5b654 (diff)
Merge tag 'iommu-updates-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel: - Debugfs support for the Intel VT-d driver. When enabled, it now also exposes some of its internal data structures to user-space for debugging purposes. - ARM-SMMU driver now uses the generic deferred flushing and fast-path iova allocation code. This is expected to be a major performance improvement, as this allocation path scales a lot better. - Support for r8a7744 in the Renesas iommu driver - Couple of minor fixes and improvements all over the place * tag 'iommu-updates-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (39 commits) iommu/arm-smmu-v3: Remove unnecessary wrapper function iommu/arm-smmu-v3: Add SPDX header iommu/amd: Add default branch in amd_iommu_capable() dt-bindings: iommu: ipmmu-vmsa: Add r8a7744 support iommu/amd: Move iommu_init_pci() to .init section iommu/arm-smmu: Support non-strict mode iommu/io-pgtable-arm-v7s: Add support for non-strict mode iommu/arm-smmu-v3: Add support for non-strict mode iommu/io-pgtable-arm: Add support for non-strict mode iommu: Add "iommu.strict" command line option iommu/dma: Add support for non-strict mode iommu/arm-smmu: Ensure that page-table updates are visible before TLBI iommu/arm-smmu-v3: Implement flush_iotlb_all hook iommu/arm-smmu-v3: Avoid back-to-back CMD_SYNC operations iommu/arm-smmu-v3: Fix unexpected CMD_SYNC timeout iommu/io-pgtable-arm: Fix race handling in split_blk_unmap() iommu/arm-smmu-v3: Fix a couple of minor comment typos iommu: Fix a typo iommu: Remove .domain_{get,set}_windows iommu: Tidy up window attributes ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt12
-rw-r--r--Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt1
-rw-r--r--Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt39
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi7
-rw-r--r--arch/arm64/mm/dma-mapping.c10
-rw-r--r--arch/x86/include/asm/irq_remapping.h2
-rw-r--r--drivers/bus/fsl-mc/fsl-mc-bus.c16
-rw-r--r--drivers/iommu/Kconfig13
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd_iommu.c2
-rw-r--r--drivers/iommu/amd_iommu_init.c2
-rw-r--r--drivers/iommu/arm-smmu-v3.c140
-rw-r--r--drivers/iommu/arm-smmu.c106
-rw-r--r--drivers/iommu/dma-iommu.c55
-rw-r--r--drivers/iommu/fsl_pamu_domain.c119
-rw-r--r--drivers/iommu/intel-iommu-debugfs.c314
-rw-r--r--drivers/iommu/intel-iommu.c32
-rw-r--r--drivers/iommu/intel_irq_remapping.c2
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c11
-rw-r--r--drivers/iommu/io-pgtable-arm.c23
-rw-r--r--drivers/iommu/io-pgtable.h5
-rw-r--r--drivers/iommu/iommu.c58
-rw-r--r--drivers/iommu/iova.c22
-rw-r--r--drivers/iommu/ipmmu-vmsa.c5
-rw-r--r--drivers/iommu/of_iommu.c25
-rw-r--r--drivers/of/base.c102
-rw-r--r--drivers/of/irq.c5
-rw-r--r--drivers/pci/of.c101
-rw-r--r--include/linux/fsl/mc.h8
-rw-r--r--include/linux/intel-iommu.h72
-rw-r--r--include/linux/iommu.h10
-rw-r--r--include/linux/iova.h1
-rw-r--r--include/linux/of.h11
-rw-r--r--include/linux/of_pci.h10
34 files changed, 986 insertions, 356 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8022d902e770..5b5f1ba76bba 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1759,6 +1759,18 @@
nobypass [PPC/POWERNV]
Disable IOMMU bypass, using IOMMU for PCI devices.
+ iommu.strict= [ARM64] Configure TLB invalidation behaviour
+ Format: { "0" | "1" }
+ 0 - Lazy mode.
+ Request that DMA unmap operations use deferred
+ invalidation of hardware TLBs, for increased
+ throughput at the cost of reduced device isolation.
+ Will fall back to strict mode if not supported by
+ the relevant IOMMU driver.
+ 1 - Strict mode (default).
+ DMA unmap operations invalidate IOMMU hardware TLBs
+ synchronously.
+
iommu.passthrough=
[ARM64] Configure DMA to bypass the IOMMU by default.
Format: { "0" | "1" }
diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
index c6e2d855fe13..377ee639d103 100644
--- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
+++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
@@ -12,6 +12,7 @@ Required Properties:
- "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
- "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU.
+ - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU.
- "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU.
- "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
- "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
index 6611a7c2053a..01fdc33a41d0 100644
--- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
+++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
@@ -9,6 +9,25 @@ blocks that can be used to create functional hardware objects/devices
such as network interfaces, crypto accelerator instances, L2 switches,
etc.
+For an overview of the DPAA2 architecture and fsl-mc bus see:
+Documentation/networking/dpaa2/overview.rst
+
+As described in the above overview, all DPAA2 objects in a DPRC share the
+same hardware "isolation context" and a 10-bit value called an ICID
+(isolation context id) is expressed by the hardware to identify
+the requester.
+
+The generic 'iommus' property is insufficient to describe the relationship
+between ICIDs and IOMMUs, so an iommu-map property is used to define
+the set of possible ICIDs under a root DPRC and how they map to
+an IOMMU.
+
+For generic IOMMU bindings, see
+Documentation/devicetree/bindings/iommu/iommu.txt.
+
+For arm-smmu binding, see:
+Documentation/devicetree/bindings/iommu/arm,smmu.txt.
+
Required properties:
- compatible
@@ -88,14 +107,34 @@ Sub-nodes:
Value type: <phandle>
Definition: Specifies the phandle to the PHY device node associated
with the this dpmac.
+Optional properties:
+
+- iommu-map: Maps an ICID to an IOMMU and associated iommu-specifier
+ data.
+
+ The property is an arbitrary number of tuples of
+ (icid-base,iommu,iommu-base,length).
+
+ Any ICID i in the interval [icid-base, icid-base + length) is
+ associated with the listed IOMMU, with the iommu-specifier
+ (i - icid-base + iommu-base).
Example:
+ smmu: iommu@5000000 {
+ compatible = "arm,mmu-500";
+ #iommu-cells = <1>;
+ stream-match-mask = <0x7C00>;
+ ...
+ };
+
fsl_mc: fsl-mc@80c000000 {
compatible = "fsl,qoriq-mc";
reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */
<0x00000000 0x08340000 0 0x40000>; /* MC control reg */
msi-parent = <&its>;
+ /* define map for ICIDs 23-64 */
+ iommu-map = <23 &smmu 23 41>;
#address-cells = <3>;
#size-cells = <1>;
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
index 8cb78dd99672..90c0faf8579f 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi
@@ -148,6 +148,7 @@
#address-cells = <2>;
#size-cells = <2>;
ranges;
+ dma-ranges = <0x0 0x0 0x0 0x0 0x10000 0x00000000>;
clockgen: clocking@1300000 {
compatible = "fsl,ls2080a-clockgen";
@@ -321,6 +322,8 @@
reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */
<0x00000000 0x08340000 0 0x40000>; /* MC control reg */
msi-parent = <&its>;
+ iommu-map = <0 &smmu 0 0>; /* This is fixed-up by u-boot */
+ dma-coherent;
#address-cells = <3>;
#size-cells = <1>;
@@ -424,6 +427,9 @@
compatible = "arm,mmu-500";
reg = <0 0x5000000 0 0x800000>;
#global-interrupts = <12>;
+ #iommu-cells = <1>;
+ stream-match-mask = <0x7C00>;
+ dma-coherent;
interrupts = <0 13 4>, /* global secure fault */
<0 14 4>, /* combined secure interrupt */
<0 15 4>, /* global non-secure fault */
@@ -466,7 +472,6 @@
<0 204 4>, <0 205 4>,
<0 206 4>, <0 207 4>,
<0 208 4>, <0 209 4>;
- mmu-masters = <&fsl_mc 0x300 0>;
};
dspi: dspi@2100000 {
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 072c51fb07d7..cf017c5bb5e7 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -712,7 +712,7 @@ static void __iommu_sync_single_for_cpu(struct device *dev,
if (is_device_dma_coherent(dev))
return;
- phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
+ phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
__dma_unmap_area(phys_to_virt(phys), size, dir);
}
@@ -725,7 +725,7 @@ static void __iommu_sync_single_for_device(struct device *dev,
if (is_device_dma_coherent(dev))
return;
- phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
+ phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
__dma_map_area(phys_to_virt(phys), size, dir);
}
@@ -738,9 +738,9 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
int prot = dma_info_to_prot(dir, coherent, attrs);
dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
- if (!iommu_dma_mapping_error(dev, dev_addr) &&
- (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- __iommu_sync_single_for_device(dev, dev_addr, size, dir);
+ if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ !iommu_dma_mapping_error(dev, dev_addr))
+ __dma_map_area(page_address(page) + offset, size, dir);
return dev_addr;
}
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5f26962eff42..67ed72f31cc2 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -45,6 +45,8 @@ struct vcpu_data {
#ifdef CONFIG_IRQ_REMAP
+extern raw_spinlock_t irq_2_ir_lock;
+
extern bool irq_remapping_cap(enum irq_remap_cap cap);
extern void set_irq_remapping_broken(void);
extern int irq_remapping_prepare(void);
diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c
index 4552b06fe601..439ba5c23693 100644
--- a/drivers/bus/fsl-mc/fsl-mc-bus.c
+++ b/drivers/bus/fsl-mc/fsl-mc-bus.c
@@ -127,6 +127,16 @@ static int fsl_mc_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
return 0;
}
+static int fsl_mc_dma_configure(struct device *dev)
+{
+ struct device *dma_dev = dev;
+
+ while (dev_is_fsl_mc(dma_dev))
+ dma_dev = dma_dev->parent;
+
+ return of_dma_configure(dev, dma_dev->of_node, 0);
+}
+
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
@@ -148,6 +158,7 @@ struct bus_type fsl_mc_bus_type = {
.name = "fsl-mc",
.match = fsl_mc_bus_match,
.uevent = fsl_mc_bus_uevent,
+ .dma_configure = fsl_mc_dma_configure,
.dev_groups = fsl_mc_dev_groups,
};
EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
@@ -621,6 +632,7 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc,
mc_dev->icid = parent_mc_dev->icid;
mc_dev->dma_mask = FSL_MC_DEFAULT_DMA_MASK;
mc_dev->dev.dma_mask = &mc_dev->dma_mask;
+ mc_dev->dev.coherent_dma_mask = mc_dev->dma_mask;
dev_set_msi_domain(&mc_dev->dev,
dev_get_msi_domain(&parent_mc_dev->dev));
}
@@ -638,10 +650,6 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc,
goto error_cleanup_dev;
}
- /* Objects are coherent, unless 'no shareability' flag set. */
- if (!(obj_desc->flags & FSL_MC_OBJ_FLAG_NO_MEM_SHAREABILITY))
- arch_setup_dma_ops(&mc_dev->dev, 0, 0, NULL, true);
-
/*
* The device-specific probe callback will get invoked by device_add()
*/
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 83e6d993fca5..d9a25715650e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -186,6 +186,19 @@ config INTEL_IOMMU
and include PCI device scope covered by these DMA
remapping devices.
+config INTEL_IOMMU_DEBUGFS
+ bool "Export Intel IOMMU internals in Debugfs"
+ depends on INTEL_IOMMU && IOMMU_DEBUGFS
+ help
+ !!!WARNING!!!
+
+ DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!!
+
+ Expose Intel IOMMU internals in Debugfs.
+
+ This option is -NOT- intended for production environments, and should
+ only be enabled for debugging Intel IOMMU.
+
config INTEL_IOMMU_SVM
bool "Support for Shared Virtual Memory with Intel IOMMU"
depends on INTEL_IOMMU && X86
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index ab5eba6edf82..a158a68c8ea8 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
+obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index bee0dfb7b93b..1167ff0416cf 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3083,6 +3083,8 @@ static bool amd_iommu_capable(enum iommu_cap cap)
return (irq_remapping_enabled == 1);
case IOMMU_CAP_NOEXEC:
return false;
+ default:
+ break;
}
return false;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 3931c7de7c69..bb2cd29e1658 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1719,7 +1719,7 @@ static const struct attribute_group *amd_iommu_groups[] = {
NULL,
};
-static int iommu_init_pci(struct amd_iommu *iommu)
+static int __init iommu_init_pci(struct amd_iommu *iommu)
{
int cap_ptr = iommu->cap_ptr;
u32 range, misc, low, high;
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 5059d09f3202..6947ccf26512 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1,18 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* IOMMU API for ARM architected SMMUv3 implementations.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
* Copyright (C) 2015 ARM Limited
*
* Author: Will Deacon <will.deacon@arm.com>
@@ -567,7 +556,8 @@ struct arm_smmu_device {
int gerr_irq;
int combined_irq;
- atomic_t sync_nr;
+ u32 sync_nr;
+ u8 prev_cmd_opcode;
unsigned long ias; /* IPA */
unsigned long oas; /* PA */
@@ -611,6 +601,7 @@ struct arm_smmu_domain {
struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
+ bool non_strict;
enum arm_smmu_domain_stage stage;
union {
@@ -708,7 +699,7 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
}
/*
- * Wait for the SMMU to consume items. If drain is true, wait until the queue
+ * Wait for the SMMU to consume items. If sync is true, wait until the queue
* is empty. Otherwise, wait until there is at least one free slot.
*/
static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
@@ -901,6 +892,8 @@ static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
struct arm_smmu_queue *q = &smmu->cmdq.q;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+ smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
+
while (queue_insert_raw(q, cmd) == -ENOSPC) {
if (queue_poll_cons(q, false, wfe))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
@@ -948,15 +941,21 @@ static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_CMD_SYNC,
.sync = {
- .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
.msiaddr = virt_to_phys(&smmu->sync_count),
},
};
- arm_smmu_cmdq_build_cmd(cmd, &ent);
-
spin_lock_irqsave(&smmu->cmdq.lock, flags);
- arm_smmu_cmdq_insert_cmd(smmu, cmd);
+
+ /* Piggy-back on the previous command if it's a SYNC */
+ if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
+ ent.sync.msidata = smmu->sync_nr;
+ } else {
+ ent.sync.msidata = ++smmu->sync_nr;
+ arm_smmu_cmdq_build_cmd(cmd, &ent);
+ arm_smmu_cmdq_insert_cmd(smmu, cmd);
+ }
+
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
@@ -1372,15 +1371,11 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
}
/* IO_PGTABLE API */
-static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
-{
- arm_smmu_cmdq_issue_sync(smmu);
-}
-
static void arm_smmu_tlb_sync(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
- __arm_smmu_tlb_sync(smmu_domain->smmu);
+
+ arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
}
static void arm_smmu_tlb_inv_context(void *cookie)
@@ -1398,8 +1393,14 @@ static void arm_smmu_tlb_inv_context(void *cookie)
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
}
+ /*
+ * NOTE: when io-pgtable is in non-strict mode, we may get here with
+ * PTEs previously cleared by unmaps on the current CPU not yet visible
+ * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
+ * to guarantee those are observed before the TLBI. Do be careful, 007.
+ */
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- __arm_smmu_tlb_sync(smmu);
+ arm_smmu_cmdq_issue_sync(smmu);
}
static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
@@ -1624,6 +1625,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+ if (smmu_domain->non_strict)
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
if (!pgtbl_ops)
return -ENOMEM;
@@ -1772,12 +1776,20 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
return ops->unmap(ops, iova, size);
}
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (smmu_domain->smmu)
+ arm_smmu_tlb_inv_context(smmu_domain);
+}
+
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
{
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
if (smmu)
- __arm_smmu_tlb_sync(smmu);
+ arm_smmu_cmdq_issue_sync(smmu);
}
static phys_addr_t
@@ -1917,15 +1929,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
- return 0;
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ *(int *)data = smmu_domain->non_strict;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
default:
- return -ENODEV;
+ return -EINVAL;
}
}
@@ -1935,26 +1959,37 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
int ret = 0;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
mutex_lock(&smmu_domain->init_mutex);
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- if (smmu_domain->smmu) {
- ret = -EPERM;
- goto out_unlock;
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ if (smmu_domain->smmu) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (*(int *)data)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch(attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ smmu_domain->non_strict = *(int *)data;
+ break;
+ default:
+ ret = -ENODEV;
}
-
- if (*(int *)data)
- smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
- else
- smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
break;
default:
- ret = -ENODEV;
+ ret = -EINVAL;
}
out_unlock:
@@ -1999,7 +2034,7 @@ static struct iommu_ops arm_smmu_ops = {
.attach_dev = arm_smmu_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
- .flush_iotlb_all = arm_smmu_iotlb_sync,
+ .flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
@@ -2180,7 +2215,6 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
{
int ret;
- atomic_set(&smmu->sync_nr, 0);
ret = arm_smmu_init_queues(smmu);
if (ret)
return ret;
@@ -2353,8 +2387,8 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
irq = smmu->combined_irq;
if (irq) {
/*
- * Cavium ThunderX2 implementation doesn't not support unique
- * irq lines. Use single irq line for all the SMMUv3 interrupts.
+ * Cavium ThunderX2 implementation doesn't support unique irq
+ * lines. Use a single irq line for all the SMMUv3 interrupts.
*/
ret = devm_request_threaded_irq(smmu->dev, irq,
arm_smmu_combined_irq_handler,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index fd1b80ef9490..5a28ae892504 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -52,6 +52,7 @@
#include <linux/spinlock.h>
#include <linux/amba/bus.h>
+#include <linux/fsl/mc.h>
#include "io-pgtable.h"
#include "arm-smmu-regs.h"
@@ -246,6 +247,7 @@ struct arm_smmu_domain {
const struct iommu_gather_ops *tlb_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage;
+ bool non_strict;
struct mutex init_mutex; /* Protects smmu pointer */
spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
struct iommu_domain domain;
@@ -447,7 +449,11 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie)
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
- writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
+ /*
+ * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
+ * cleared by the current CPU are visible to the SMMU before the TLBI.
+ */
+ writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
arm_smmu_tlb_sync_context(cookie);
}
@@ -457,7 +463,8 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie)
struct arm_smmu_device *smmu = smmu_domain->smmu;
void __iomem *base = ARM_SMMU_GR0(smmu);
- writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+ /* NOTE: see above */
+ writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
arm_smmu_tlb_sync_global(smmu);
}
@@ -469,6 +476,9 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
+ if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ wmb();
+
if (stage1) {
reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
@@ -510,6 +520,9 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
struct arm_smmu_domain *smmu_domain = cookie;
void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
+ if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ wmb();
+
writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
}
@@ -863,6 +876,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+ if (smmu_domain->non_strict)
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
smmu_domain->smmu = smmu;
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
if (!pgtbl_ops) {
@@ -1252,6 +1268,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
return ops->unmap(ops, iova, size);
}
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (smmu_domain->tlb_ops)
+ smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+}
+
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
@@ -1459,6 +1483,8 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev)
if (dev_is_pci(dev))
group = pci_device_group(dev);
+ else if (dev_is_fsl_mc(dev))
+ group = fsl_mc_device_group(dev);
else
group = generic_device_group(dev);
@@ -1470,15 +1496,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
- return 0;
+ switch(domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ *(int *)data = smmu_domain->non_strict;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
default:
- return -ENODEV;
+ return -EINVAL;
}
}
@@ -1488,28 +1526,38 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
int ret = 0;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
mutex_lock(&smmu_domain->init_mutex);
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- if (smmu_domain->smmu) {
- ret = -EPERM;
- goto out_unlock;
+ switch(domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ if (smmu_domain->smmu) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (*(int *)data)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ smmu_domain->non_strict = *(int *)data;
+ break;
+ default:
+ ret = -ENODEV;
}
-
- if (*(int *)data)
- smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
- else
- smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
break;
default:
- ret = -ENODEV;
+ ret = -EINVAL;
}
-
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
return ret;
@@ -1562,7 +1610,7 @@ static struct iommu_ops arm_smmu_ops = {
.attach_dev = ar