From 8e517e762a826d16451fb6ffb0a8722e4265582e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Jul 2017 15:55:48 +0100 Subject: iommu/arm-smmu: Reintroduce locking around TLB sync operations Commit 523d7423e21b ("iommu/arm-smmu: Remove io-pgtable spinlock") removed the locking used to serialise map/unmap calls into the io-pgtable code from the ARM SMMU driver. This is good for performance, but opens us up to a nasty race with TLB syncs because the TLB sync register is shared within a context bank (or even globally for stage-2 on SMMUv1). There are two cases to consider: 1. A CPU can be spinning on the completion of a TLB sync, take an interrupt which issues a subsequent TLB sync, and then report a timeout on return from the interrupt. 2. A CPU can be spinning on the completion of a TLB sync, but other CPUs can continuously issue additional TLB syncs in such a way that the backoff logic reports a timeout. Rather than fix this by spinning for completion of prior TLB syncs before issuing a new one (which may suffer from fairness issues on large systems), instead reintroduce locking around TLB sync operations in the ARM SMMU driver. Fixes: 523d7423e21b ("iommu/arm-smmu: Remove io-pgtable spinlock") Cc: Robin Murphy Reported-by: Ray Jui Tested-by: Ray Jui Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index bc89b4d6c043..98ff462b5992 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -400,6 +400,8 @@ struct arm_smmu_device { u32 cavium_id_base; /* Specific to Cavium */ + spinlock_t global_sync_lock; + /* IOMMU core code handle */ struct iommu_device iommu; }; @@ -436,7 +438,7 @@ struct arm_smmu_domain { struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; struct mutex init_mutex; /* Protects smmu pointer */ - spinlock_t cb_lock; /* Serialises ATS1* ops */ + spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */ struct iommu_domain domain; }; @@ -602,9 +604,12 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu) { void __iomem *base = ARM_SMMU_GR0(smmu); + unsigned long flags; + spin_lock_irqsave(&smmu->global_sync_lock, flags); __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC, base + ARM_SMMU_GR0_sTLBGSTATUS); + spin_unlock_irqrestore(&smmu->global_sync_lock, flags); } static void arm_smmu_tlb_sync_context(void *cookie) @@ -612,9 +617,12 @@ static void arm_smmu_tlb_sync_context(void *cookie) struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx); + unsigned long flags; + spin_lock_irqsave(&smmu_domain->cb_lock, flags); __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC, base + ARM_SMMU_CB_TLBSTATUS); + spin_unlock_irqrestore(&smmu_domain->cb_lock, flags); } static void arm_smmu_tlb_sync_vmid(void *cookie) @@ -1925,6 +1933,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) smmu->num_mapping_groups = size; mutex_init(&smmu->stream_map_mutex); + spin_lock_init(&smmu->global_sync_lock); if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) { smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L; -- cgit v1.2.3 From 98a8f63e56a0bdcf1d0af8d840d011ab90386684 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 6 Jul 2017 17:55:30 +0100 Subject: iommu/mtk: Avoid redundant TLB syncs locally Under certain circumstances, the io-pgtable code may end up issuing two TLB sync operations without any intervening invalidations. This goes badly for the M4U hardware, since it means the second sync ends up polling for a non-existent operation to finish, and as a result times out and warns. The io_pgtable_tlb_* helpers implement a high-level optimisation to avoid issuing the second sync at all in such cases, but in order to work correctly that requires all pagetable operations to be serialised under a lock, thus is no longer applicable to all io-pgtable users. Since we're the only user actually relying on this flag for correctness, let's reimplement it locally to avoid the headache of trying to make the high-level version concurrency-safe for other users. CC: Yong Wu CC: Matthias Brugger Tested-by: Yong Wu Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/mtk_iommu.c | 6 ++++++ drivers/iommu/mtk_iommu.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 5d14cd15198d..91c6d367ab35 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -129,6 +129,7 @@ static void mtk_iommu_tlb_add_flush_nosync(unsigned long iova, size_t size, writel_relaxed(iova, data->base + REG_MMU_INVLD_START_A); writel_relaxed(iova + size - 1, data->base + REG_MMU_INVLD_END_A); writel_relaxed(F_MMU_INV_RANGE, data->base + REG_MMU_INVALIDATE); + data->tlb_flush_active = true; } static void mtk_iommu_tlb_sync(void *cookie) @@ -137,6 +138,10 @@ static void mtk_iommu_tlb_sync(void *cookie) int ret; u32 tmp; + /* Avoid timing out if there's nothing to wait for */ + if (!data->tlb_flush_active) + return; + ret = readl_poll_timeout_atomic(data->base + REG_MMU_CPE_DONE, tmp, tmp != 0, 10, 100000); if (ret) { @@ -146,6 +151,7 @@ static void mtk_iommu_tlb_sync(void *cookie) } /* Clear the CPE status */ writel_relaxed(0, data->base + REG_MMU_CPE_DONE); + data->tlb_flush_active = false; } static const struct iommu_gather_ops mtk_iommu_gather_ops = { diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 2a28eadeea0e..c06cc91b5d9a 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -47,6 +47,7 @@ struct mtk_iommu_data { struct iommu_group *m4u_group; struct mtk_smi_iommu smi_imu; /* SMI larb iommu info */ bool enable_4GB; + bool tlb_flush_active; struct iommu_device iommu; }; -- cgit v1.2.3 From 2984f7f3bb5a82df763c2d81444573ed86f36eb8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 6 Jul 2017 17:55:31 +0100 Subject: Revert "iommu/io-pgtable: Avoid redundant TLB syncs" The tlb_sync_pending flag was necessary for correctness in the Mediatek M4U driver, but since it offered a small theoretical optimisation for all io-pgtable users it was implemented as a high-level thing. However, now that some users may not be using a synchronising lock, there are several ways this flag can go wrong for them, and at worst it could result in incorrect behaviour. Since we've addressed the correctness issue within the Mediatek driver itself, and fixing the optimisation aspect to be concurrency-safe would be quite a headache (and impose extra overhead on every operation for the sake of slightly helping one case which will virtually never happen in typical usage), let's just retire it. This reverts commit 88492a4700360a086e55d8874ad786105a5e8b0f. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h index 524263a7ae6f..a3e667077b14 100644 --- a/drivers/iommu/io-pgtable.h +++ b/drivers/iommu/io-pgtable.h @@ -158,14 +158,12 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops); * @fmt: The page table format. * @cookie: An opaque token provided by the IOMMU driver and passed back to * any callback routines. - * @tlb_sync_pending: Private flag for optimising out redundant syncs. * @cfg: A copy of the page table configuration. * @ops: The page table operations in use for this set of page tables. */ struct io_pgtable { enum io_pgtable_fmt fmt; void *cookie; - bool tlb_sync_pending; struct io_pgtable_cfg cfg; struct io_pgtable_ops ops; }; @@ -175,22 +173,17 @@ struct io_pgtable { static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) { iop->cfg.tlb->tlb_flush_all(iop->cookie); - iop->tlb_sync_pending = true; } static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop, unsigned long iova, size_t size, size_t granule, bool leaf) { iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie); - iop->tlb_sync_pending = true; } static inline void io_pgtable_tlb_sync(struct io_pgtable *iop) { - if (iop->tlb_sync_pending) { - iop->cfg.tlb->tlb_sync(iop->cookie); - iop->tlb_sync_pending = false; - } + iop->cfg.tlb->tlb_sync(iop->cookie); } /** -- cgit v1.2.3 From c54451a5e2415948274bc71f469e5349469addcc Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Thu, 6 Jul 2017 15:07:00 +0530 Subject: iommu/arm-smmu: Fix the error path in arm_smmu_add_device fwspec->iommu_priv is available only after arm_smmu_master_cfg instance has been allocated. We shouldn't free it before that. Also it's logical to free the master cfg itself without checking for fwspec. Signed-off-by: Vivek Gautam [will: remove redundant assignment to fwspec] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 98ff462b5992..b97188acc4f1 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1519,7 +1519,6 @@ static int arm_smmu_add_device(struct device *dev) if (using_legacy_binding) { ret = arm_smmu_register_legacy_master(dev, &smmu); - fwspec = dev->iommu_fwspec; if (ret) goto out_free; } else if (fwspec && fwspec->ops == &arm_smmu_ops) { @@ -1558,15 +1557,15 @@ static int arm_smmu_add_device(struct device *dev) ret = arm_smmu_master_alloc_smes(dev); if (ret) - goto out_free; + goto out_cfg_free; iommu_device_link(&smmu->iommu, dev); return 0; +out_cfg_free: + kfree(cfg); out_free: - if (fwspec) - kfree(fwspec->iommu_priv); iommu_fwspec_free(dev); return ret; } -- cgit v1.2.3 From 76557391433c77d330cede1a531b358d2f90df66 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 3 Jul 2017 14:52:24 +0100 Subject: iommu/io-pgtable: Sanitise map/unmap addresses It may be an egregious error to attempt to use addresses outside the range of the pagetable format, but that still doesn't mean we should merrily wreak havoc by silently mapping/unmapping whatever truncated portions of them might happen to correspond to real addresses. Add some up-front checks to sanitise our inputs so that buggy callers don't invite potential memory corruption. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 6 ++++++ drivers/iommu/io-pgtable-arm.c | 7 +++++++ 2 files changed, 13 insertions(+) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index af330f513653..d665d0dc16e8 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -479,6 +479,9 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, if (!(prot & (IOMMU_READ | IOMMU_WRITE))) return 0; + if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr))) + return -ERANGE; + ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd); /* * Synchronise all PTE updates for the new mapping before there's @@ -659,6 +662,9 @@ static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); size_t unmapped; + if (WARN_ON(upper_32_bits(iova))) + return 0; + unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd); if (unmapped) io_pgtable_tlb_sync(&data->iop); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index b182039862c5..e8018a308868 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -452,6 +452,10 @@ static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova, if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) return 0; + if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias) || + paddr >= (1ULL << data->iop.cfg.oas))) + return -ERANGE; + prot = arm_lpae_prot_to_pte(data, iommu_prot); ret = __arm_lpae_map(data, iova, paddr, size, prot, lvl, ptep); /* @@ -610,6 +614,9 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, arm_lpae_iopte *ptep = data->pgd; int lvl = ARM_LPAE_START_LVL(data); + if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) + return 0; + unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); if (unmapped) io_pgtable_tlb_sync(&data->iop); -- cgit v1.2.3 From efe6f241602cb61466895f6816b8ea6b90f04d4e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 5 Jul 2017 21:29:59 -0500 Subject: iommu/amd: Enable ga_log_intr when enabling guest_mode IRTE[GALogIntr] bit should set when enabling guest_mode, which enables IOMMU to generate entry in GALog when IRTE[IsRun] is not set, and send an interrupt to notify IOMMU driver. Signed-off-by: Suravee Suthikulpanit Cc: Joerg Roedel Cc: stable@vger.kernel.org # v4.9+ Fixes: d98de49a53e48 ('iommu/amd: Enable vAPIC interrupt remapping mode by default') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 688e77576e5a..354cbd6392cd 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4452,6 +4452,7 @@ static int amd_ir_set_vcpu_affinity(struct irq_data *data, void *vcpu_info) /* Setting */ irte->hi.fields.ga_root_ptr = (pi_data->base >> 12); irte->hi.fields.vector = vcpu_pi_info->vector; + irte->lo.fields_vapic.ga_log_intr = 1; irte->lo.fields_vapic.guest_mode = 1; irte->lo.fields_vapic.ga_tag = pi_data->ga_tag; -- cgit v1.2.3 From 74ddda71f44c84af62f736a77fb9fcebe5bb436a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Jul 2017 14:17:55 +0200 Subject: iommu/amd: Fix schedule-while-atomic BUG in initialization code The register_syscore_ops() function takes a mutex and might sleep. In the IOMMU initialization code it is invoked during irq-remapping setup already, where irqs are disabled. This causes a schedule-while-atomic bug: BUG: sleeping function called from invalid context at kernel/locking/mutex.c:747 in_atomic(): 0, irqs_disabled(): 1, pid: 1, name: swapper/0 no locks held by swapper/0/1. irq event stamp: 304 hardirqs last enabled at (303): [] _raw_spin_unlock_irqrestore+0x36/0x60 hardirqs last disabled at (304): [] enable_IR_x2apic+0x79/0x196 softirqs last enabled at (36): [] __do_softirq+0x35f/0x4ec softirqs last disabled at (31): [] irq_exit+0x105/0x120 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.13.0-rc2.1.el7a.test.x86_64.debug #1 Hardware name: PowerEdge C6145 /040N24, BIOS 3.5.0 10/28/2014 Call Trace: dump_stack+0x85/0xca ___might_sleep+0x22a/0x260 __might_sleep+0x4a/0x80 __mutex_lock+0x58/0x960 ? iommu_completion_wait.part.17+0xb5/0x160 ? register_syscore_ops+0x1d/0x70 ? iommu_flush_all_caches+0x120/0x150 mutex_lock_nested+0x1b/0x20 register_syscore_ops+0x1d/0x70 state_next+0x119/0x910 iommu_go_to_state+0x29/0x30 amd_iommu_enable+0x13/0x23 Fix it by moving the register_syscore_ops() call to the next initialization step, which runs with irqs enabled. Reported-by: Artem Savkov Tested-by: Artem Savkov Acked-by: Thomas Gleixner Fixes: 2c0ae1720c09 ('iommu/amd: Convert iommu initialization to state machine') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 5cc597b383c7..372303700566 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2440,11 +2440,11 @@ static int __init state_next(void) break; case IOMMU_ACPI_FINISHED: early_enable_iommus(); - register_syscore_ops(&amd_iommu_syscore_ops); x86_platform.iommu_shutdown = disable_iommus; init_state = IOMMU_ENABLED; break; case IOMMU_ENABLED: + register_syscore_ops(&amd_iommu_syscore_ops); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; enable_iommus_v2(); -- cgit v1.2.3