diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-26 10:50:10 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-26 10:50:10 -0700 |
commit | d1f2b1710d92a80d60351503bbf41cdac95ed7a8 (patch) | |
tree | cc8247f2917c38b476b8777c29e3c9a154445d9e | |
parent | 18d0eae30e6a4f8644d589243d7ac1d70d29203d (diff) | |
parent | 2f2fbfb71ecc221352d84ae6430b42031ae5b654 (diff) |
Merge tag 'iommu-updates-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel:
- Debugfs support for the Intel VT-d driver.
When enabled, it now also exposes some of its internal data
structures to user-space for debugging purposes.
- ARM-SMMU driver now uses the generic deferred flushing and fast-path
iova allocation code.
This is expected to be a major performance improvement, as this
allocation path scales a lot better.
- Support for r8a7744 in the Renesas iommu driver
- Couple of minor fixes and improvements all over the place
* tag 'iommu-updates-v4.20' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (39 commits)
iommu/arm-smmu-v3: Remove unnecessary wrapper function
iommu/arm-smmu-v3: Add SPDX header
iommu/amd: Add default branch in amd_iommu_capable()
dt-bindings: iommu: ipmmu-vmsa: Add r8a7744 support
iommu/amd: Move iommu_init_pci() to .init section
iommu/arm-smmu: Support non-strict mode
iommu/io-pgtable-arm-v7s: Add support for non-strict mode
iommu/arm-smmu-v3: Add support for non-strict mode
iommu/io-pgtable-arm: Add support for non-strict mode
iommu: Add "iommu.strict" command line option
iommu/dma: Add support for non-strict mode
iommu/arm-smmu: Ensure that page-table updates are visible before TLBI
iommu/arm-smmu-v3: Implement flush_iotlb_all hook
iommu/arm-smmu-v3: Avoid back-to-back CMD_SYNC operations
iommu/arm-smmu-v3: Fix unexpected CMD_SYNC timeout
iommu/io-pgtable-arm: Fix race handling in split_blk_unmap()
iommu/arm-smmu-v3: Fix a couple of minor comment typos
iommu: Fix a typo
iommu: Remove .domain_{get,set}_windows
iommu: Tidy up window attributes
...
34 files changed, 986 insertions, 356 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 8022d902e770..5b5f1ba76bba 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1759,6 +1759,18 @@ nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. + iommu.strict= [ARM64] Configure TLB invalidation behaviour + Format: { "0" | "1" } + 0 - Lazy mode. + Request that DMA unmap operations use deferred + invalidation of hardware TLBs, for increased + throughput at the cost of reduced device isolation. + Will fall back to strict mode if not supported by + the relevant IOMMU driver. + 1 - Strict mode (default). + DMA unmap operations invalidate IOMMU hardware TLBs + synchronously. + iommu.passthrough= [ARM64] Configure DMA to bypass the IOMMU by default. Format: { "0" | "1" } diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt index c6e2d855fe13..377ee639d103 100644 --- a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt +++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt @@ -12,6 +12,7 @@ Required Properties: - "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU. - "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU. + - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU. - "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU. - "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU. - "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU. diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt index 6611a7c2053a..01fdc33a41d0 100644 --- a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt +++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt @@ -9,6 +9,25 @@ blocks that can be used to create functional hardware objects/devices such as network interfaces, crypto accelerator instances, L2 switches, etc. +For an overview of the DPAA2 architecture and fsl-mc bus see: +Documentation/networking/dpaa2/overview.rst + +As described in the above overview, all DPAA2 objects in a DPRC share the +same hardware "isolation context" and a 10-bit value called an ICID +(isolation context id) is expressed by the hardware to identify +the requester. + +The generic 'iommus' property is insufficient to describe the relationship +between ICIDs and IOMMUs, so an iommu-map property is used to define +the set of possible ICIDs under a root DPRC and how they map to +an IOMMU. + +For generic IOMMU bindings, see +Documentation/devicetree/bindings/iommu/iommu.txt. + +For arm-smmu binding, see: +Documentation/devicetree/bindings/iommu/arm,smmu.txt. + Required properties: - compatible @@ -88,14 +107,34 @@ Sub-nodes: Value type: <phandle> Definition: Specifies the phandle to the PHY device node associated with the this dpmac. +Optional properties: + +- iommu-map: Maps an ICID to an IOMMU and associated iommu-specifier + data. + + The property is an arbitrary number of tuples of + (icid-base,iommu,iommu-base,length). + + Any ICID i in the interval [icid-base, icid-base + length) is + associated with the listed IOMMU, with the iommu-specifier + (i - icid-base + iommu-base). Example: + smmu: iommu@5000000 { + compatible = "arm,mmu-500"; + #iommu-cells = <1>; + stream-match-mask = <0x7C00>; + ... + }; + fsl_mc: fsl-mc@80c000000 { compatible = "fsl,qoriq-mc"; reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */ <0x00000000 0x08340000 0 0x40000>; /* MC control reg */ msi-parent = <&its>; + /* define map for ICIDs 23-64 */ + iommu-map = <23 &smmu 23 41>; #address-cells = <3>; #size-cells = <1>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index 8cb78dd99672..90c0faf8579f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -148,6 +148,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; + dma-ranges = <0x0 0x0 0x0 0x0 0x10000 0x00000000>; clockgen: clocking@1300000 { compatible = "fsl,ls2080a-clockgen"; @@ -321,6 +322,8 @@ reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */ <0x00000000 0x08340000 0 0x40000>; /* MC control reg */ msi-parent = <&its>; + iommu-map = <0 &smmu 0 0>; /* This is fixed-up by u-boot */ + dma-coherent; #address-cells = <3>; #size-cells = <1>; @@ -424,6 +427,9 @@ compatible = "arm,mmu-500"; reg = <0 0x5000000 0 0x800000>; #global-interrupts = <12>; + #iommu-cells = <1>; + stream-match-mask = <0x7C00>; + dma-coherent; interrupts = <0 13 4>, /* global secure fault */ <0 14 4>, /* combined secure interrupt */ <0 15 4>, /* global non-secure fault */ @@ -466,7 +472,6 @@ <0 204 4>, <0 205 4>, <0 206 4>, <0 207 4>, <0 208 4>, <0 209 4>; - mmu-masters = <&fsl_mc 0x300 0>; }; dspi: dspi@2100000 { diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 072c51fb07d7..cf017c5bb5e7 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -712,7 +712,7 @@ static void __iommu_sync_single_for_cpu(struct device *dev, if (is_device_dma_coherent(dev)) return; - phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); __dma_unmap_area(phys_to_virt(phys), size, dir); } @@ -725,7 +725,7 @@ static void __iommu_sync_single_for_device(struct device *dev, if (is_device_dma_coherent(dev)) return; - phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); + phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr); __dma_map_area(phys_to_virt(phys), size, dir); } @@ -738,9 +738,9 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, int prot = dma_info_to_prot(dir, coherent, attrs); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); - if (!iommu_dma_mapping_error(dev, dev_addr) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __iommu_sync_single_for_device(dev, dev_addr, size, dir); + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && + !iommu_dma_mapping_error(dev, dev_addr)) + __dma_map_area(page_address(page) + offset, size, dir); return dev_addr; } diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5f26962eff42..67ed72f31cc2 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -45,6 +45,8 @@ struct vcpu_data { #ifdef CONFIG_IRQ_REMAP +extern raw_spinlock_t irq_2_ir_lock; + extern bool irq_remapping_cap(enum irq_remap_cap cap); extern void set_irq_remapping_broken(void); extern int irq_remapping_prepare(void); diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 4552b06fe601..439ba5c23693 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -127,6 +127,16 @@ static int fsl_mc_bus_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int fsl_mc_dma_configure(struct device *dev) +{ + struct device *dma_dev = dev; + + while (dev_is_fsl_mc(dma_dev)) + dma_dev = dma_dev->parent; + + return of_dma_configure(dev, dma_dev->of_node, 0); +} + static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -148,6 +158,7 @@ struct bus_type fsl_mc_bus_type = { .name = "fsl-mc", .match = fsl_mc_bus_match, .uevent = fsl_mc_bus_uevent, + .dma_configure = fsl_mc_dma_configure, .dev_groups = fsl_mc_dev_groups, }; EXPORT_SYMBOL_GPL(fsl_mc_bus_type); @@ -621,6 +632,7 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc, mc_dev->icid = parent_mc_dev->icid; mc_dev->dma_mask = FSL_MC_DEFAULT_DMA_MASK; mc_dev->dev.dma_mask = &mc_dev->dma_mask; + mc_dev->dev.coherent_dma_mask = mc_dev->dma_mask; dev_set_msi_domain(&mc_dev->dev, dev_get_msi_domain(&parent_mc_dev->dev)); } @@ -638,10 +650,6 @@ int fsl_mc_device_add(struct fsl_mc_obj_desc *obj_desc, goto error_cleanup_dev; } - /* Objects are coherent, unless 'no shareability' flag set. */ - if (!(obj_desc->flags & FSL_MC_OBJ_FLAG_NO_MEM_SHAREABILITY)) - arch_setup_dma_ops(&mc_dev->dev, 0, 0, NULL, true); - /* * The device-specific probe callback will get invoked by device_add() */ diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 83e6d993fca5..d9a25715650e 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -186,6 +186,19 @@ config INTEL_IOMMU and include PCI device scope covered by these DMA remapping devices. +config INTEL_IOMMU_DEBUGFS + bool "Export Intel IOMMU internals in Debugfs" + depends on INTEL_IOMMU && IOMMU_DEBUGFS + help + !!!WARNING!!! + + DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!! + + Expose Intel IOMMU internals in Debugfs. + + This option is -NOT- intended for production environments, and should + only be enabled for debugging Intel IOMMU. + config INTEL_IOMMU_SVM bool "Support for Shared Virtual Memory with Intel IOMMU" depends on INTEL_IOMMU && X86 diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index ab5eba6edf82..a158a68c8ea8 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -17,6 +17,7 @@ obj-$(CONFIG_ARM_SMMU) += arm-smmu.o obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o obj-$(CONFIG_DMAR_TABLE) += dmar.o obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o +obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index bee0dfb7b93b..1167ff0416cf 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3083,6 +3083,8 @@ static bool amd_iommu_capable(enum iommu_cap cap) return (irq_remapping_enabled == 1); case IOMMU_CAP_NOEXEC: return false; + default: + break; } return false; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 3931c7de7c69..bb2cd29e1658 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1719,7 +1719,7 @@ static const struct attribute_group *amd_iommu_groups[] = { NULL, }; -static int iommu_init_pci(struct amd_iommu *iommu) +static int __init iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; u32 range, misc, low, high; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 5059d09f3202..6947ccf26512 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1,18 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IOMMU API for ARM architected SMMUv3 implementations. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * * Copyright (C) 2015 ARM Limited * * Author: Will Deacon <will.deacon@arm.com> @@ -567,7 +556,8 @@ struct arm_smmu_device { int gerr_irq; int combined_irq; - atomic_t sync_nr; + u32 sync_nr; + u8 prev_cmd_opcode; unsigned long ias; /* IPA */ unsigned long oas; /* PA */ @@ -611,6 +601,7 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; + bool non_strict; enum arm_smmu_domain_stage stage; union { @@ -708,7 +699,7 @@ static void queue_inc_prod(struct arm_smmu_queue *q) } /* - * Wait for the SMMU to consume items. If drain is true, wait until the queue + * Wait for the SMMU to consume items. If sync is true, wait until the queue * is empty. Otherwise, wait until there is at least one free slot. */ static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe) @@ -901,6 +892,8 @@ static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd) struct arm_smmu_queue *q = &smmu->cmdq.q; bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV); + smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]); + while (queue_insert_raw(q, cmd) == -ENOSPC) { if (queue_poll_cons(q, false, wfe)) dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); @@ -948,15 +941,21 @@ static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu) struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC, .sync = { - .msidata = atomic_inc_return_relaxed(&smmu->sync_nr), .msiaddr = virt_to_phys(&smmu->sync_count), }, }; - arm_smmu_cmdq_build_cmd(cmd, &ent); - spin_lock_irqsave(&smmu->cmdq.lock, flags); - arm_smmu_cmdq_insert_cmd(smmu, cmd); + + /* Piggy-back on the previous command if it's a SYNC */ + if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) { + ent.sync.msidata = smmu->sync_nr; + } else { + ent.sync.msidata = ++smmu->sync_nr; + arm_smmu_cmdq_build_cmd(cmd, &ent); + arm_smmu_cmdq_insert_cmd(smmu, cmd); + } + spin_unlock_irqrestore(&smmu->cmdq.lock, flags); return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata); @@ -1372,15 +1371,11 @@ static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev) } /* IO_PGTABLE API */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) -{ - arm_smmu_cmdq_issue_sync(smmu); -} - static void arm_smmu_tlb_sync(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; - __arm_smmu_tlb_sync(smmu_domain->smmu); + + arm_smmu_cmdq_issue_sync(smmu_domain->smmu); } static void arm_smmu_tlb_inv_context(void *cookie) @@ -1398,8 +1393,14 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; } + /* + * NOTE: when io-pgtable is in non-strict mode, we may get here with + * PTEs previously cleared by unmaps on the current CPU not yet visible + * to the SMMU. We are relying on the DSB implicit in queue_inc_prod() + * to guarantee those are observed before the TLBI. Do be careful, 007. + */ arm_smmu_cmdq_issue_cmd(smmu, &cmd); - __arm_smmu_tlb_sync(smmu); + arm_smmu_cmdq_issue_sync(smmu); } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, @@ -1624,6 +1625,9 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) if (smmu->features & ARM_SMMU_FEAT_COHERENCY) pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + if (smmu_domain->non_strict) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; + pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) return -ENOMEM; @@ -1772,12 +1776,20 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) return ops->unmap(ops, iova, size); } +static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + if (smmu_domain->smmu) + arm_smmu_tlb_inv_context(smmu_domain); +} + static void arm_smmu_iotlb_sync(struct iommu_domain *domain) { struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; if (smmu) - __arm_smmu_tlb_sync(smmu); + arm_smmu_cmdq_issue_sync(smmu); } static phys_addr_t @@ -1917,15 +1929,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - - switch (attr) { - case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; + switch (domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); + return 0; + default: + return -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + *(int *)data = smmu_domain->non_strict; + return 0; + default: + return -ENODEV; + } + break; default: - return -ENODEV; + return -EINVAL; } } @@ -1935,26 +1959,37 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - mutex_lock(&smmu_domain->init_mutex); - switch (attr) { - case DOMAIN_ATTR_NESTING: - if (smmu_domain->smmu) { - ret = -EPERM; - goto out_unlock; + switch (domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + + if (*(int *)data) + smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + break; + default: + ret = -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch(attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + smmu_domain->non_strict = *(int *)data; + break; + default: + ret = -ENODEV; } - - if (*(int *)data) - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - break; default: - ret = -ENODEV; + ret = -EINVAL; } out_unlock: @@ -1999,7 +2034,7 @@ static struct iommu_ops arm_smmu_ops = { .attach_dev = arm_smmu_attach_dev, .map = arm_smmu_map, .unmap = arm_smmu_unmap, - .flush_iotlb_all = arm_smmu_iotlb_sync, + .flush_iotlb_all = arm_smmu_flush_iotlb_all, .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, @@ -2180,7 +2215,6 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) { int ret; - atomic_set(&smmu->sync_nr, 0); ret = arm_smmu_init_queues(smmu); if (ret) return ret; @@ -2353,8 +2387,8 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) irq = smmu->combined_irq; if (irq) { /* - * Cavium ThunderX2 implementation doesn't not support unique - * irq lines. Use single irq line for all the SMMUv3 interrupts. + * Cavium ThunderX2 implementation doesn't support unique irq + * lines. Use a single irq line for all the SMMUv3 interrupts. */ ret = devm_request_threaded_irq(smmu->dev, irq, arm_smmu_combined_irq_handler, diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index fd1b80ef9490..5a28ae892504 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -52,6 +52,7 @@ #include <linux/spinlock.h> #include <linux/amba/bus.h> +#include <linux/fsl/mc.h> #include "io-pgtable.h" #include "arm-smmu-regs.h" @@ -246,6 +247,7 @@ struct arm_smmu_domain { const struct iommu_gather_ops *tlb_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; + bool non_strict; struct mutex init_mutex; /* Protects smmu pointer */ spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ struct iommu_domain domain; @@ -447,7 +449,11 @@ static void arm_smmu_tlb_inv_context_s1(void *cookie) struct arm_smmu_cfg *cfg = &smmu_domain->cfg; void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); - writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); + /* + * NOTE: this is not a relaxed write; it needs to guarantee that PTEs + * cleared by the current CPU are visible to the SMMU before the TLBI. + */ + writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); arm_smmu_tlb_sync_context(cookie); } @@ -457,7 +463,8 @@ static void arm_smmu_tlb_inv_context_s2(void *cookie) struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *base = ARM_SMMU_GR0(smmu); - writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); + /* NOTE: see above */ + writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); arm_smmu_tlb_sync_global(smmu); } @@ -469,6 +476,9 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + if (stage1) { reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; @@ -510,6 +520,9 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, struct arm_smmu_domain *smmu_domain = cookie; void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu); + if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) + wmb(); + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); } @@ -863,6 +876,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA; + if (smmu_domain->non_strict) + pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT; + smmu_domain->smmu = smmu; pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain); if (!pgtbl_ops) { @@ -1252,6 +1268,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, return ops->unmap(ops, iova, size); } +static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + if (smmu_domain->tlb_ops) + smmu_domain->tlb_ops->tlb_flush_all(smmu_domain); +} + static void arm_smmu_iotlb_sync(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -1459,6 +1483,8 @@ static struct iommu_group *arm_smmu_device_group(struct device *dev) if (dev_is_pci(dev)) group = pci_device_group(dev); + else if (dev_is_fsl_mc(dev)) + group = fsl_mc_device_group(dev); else group = generic_device_group(dev); @@ -1470,15 +1496,27 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - - switch (attr) { - case DOMAIN_ATTR_NESTING: - *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); - return 0; + switch(domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); + return 0; + default: + return -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + *(int *)data = smmu_domain->non_strict; + return 0; + default: + return -ENODEV; + } + break; default: - return -ENODEV; + return -EINVAL; } } @@ -1488,28 +1526,38 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - if (domain->type != IOMMU_DOMAIN_UNMANAGED) - return -EINVAL; - mutex_lock(&smmu_domain->init_mutex); - switch (attr) { - case DOMAIN_ATTR_NESTING: - if (smmu_domain->smmu) { - ret = -EPERM; - goto out_unlock; + switch(domain->type) { + case IOMMU_DOMAIN_UNMANAGED: + switch (attr) { + case DOMAIN_ATTR_NESTING: + if (smmu_domain->smmu) { + ret = -EPERM; + goto out_unlock; + } + + if (*(int *)data) + smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; + else + smmu_domain->stage = ARM_SMMU_DOMAIN_S1; + break; + default: + ret = -ENODEV; + } + break; + case IOMMU_DOMAIN_DMA: + switch (attr) { + case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: + smmu_domain->non_strict = *(int *)data; + break; + default: + ret = -ENODEV; } - - if (*(int *)data) - smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED; - else - smmu_domain->stage = ARM_SMMU_DOMAIN_S1; - break; default: - ret = -ENODEV; + ret = -EINVAL; } - out_unlock: mutex_unlock(&smmu_domain->init_mutex); return ret; @@ -1562,7 +1610,7 @@ static struct iommu_ops arm_smmu_ops = { .attach_dev = ar |