summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 16:43:27 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-14 16:43:27 -0800
commit2cd83ba5bede2f72cc6c79a19a1bddf576b50e88 (patch)
tree6a02f6f93f90f3fea419c3a283ced0543b603fd4
parent670ffccb2f9183eb6cb32fe92257aea52b3f8a7d (diff)
parent56f19441da39e5f27824bcbdf3f60980414b5bd0 (diff)
Merge tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio
Pull IOMMU updates from Alex Williamson: "As Joerg mentioned[1], he's out on paternity leave through the end of the year and I'm filling in for him in the interim: - Enforce MSI multiple IRQ alignment in AMD IOMMU - VT-d PASID error handling fixes - Add r8a7795 IPMMU support - Manage runtime PM links on exynos at {add,remove}_device callbacks - Fix Mediatek driver name to avoid conflict - Add terminate support to qcom fault handler - 64-bit IOVA optimizations - Simplfy IOVA domain destruction, better use of rcache, and skip anchor nodes on copy - Convert to IOMMU TLB sync API in io-pgtable-arm{-v7s} - Drop command queue lock when waiting for CMD_SYNC completion on ARM SMMU implementations supporting MSI to cacheable memory - iomu-vmsa cleanup inspired by missed IOTLB sync callbacks - Fix sleeping lock with preemption disabled for RT - Dual MMU support for TI DRA7xx DSPs - Optional flush option on IOVA allocation avoiding overhead when caller can try other options [1] https://lkml.org/lkml/2017/10/22/72" * tag 'iommu-v4.15-rc1' of git://github.com/awilliam/linux-vfio: (54 commits) iommu/iova: Use raw_cpu_ptr() instead of get_cpu_ptr() for ->fq iommu/mediatek: Fix driver name iommu/ipmmu-vmsa: Hook up r8a7795 DT matching code iommu/ipmmu-vmsa: Allow two bit SL0 iommu/ipmmu-vmsa: Make IMBUSCTR setup optional iommu/ipmmu-vmsa: Write IMCTR twice iommu/ipmmu-vmsa: IPMMU device is 40-bit bus master iommu/ipmmu-vmsa: Make use of IOMMU_OF_DECLARE() iommu/ipmmu-vmsa: Enable multi context support iommu/ipmmu-vmsa: Add optional root device feature iommu/ipmmu-vmsa: Introduce features, break out alias iommu/ipmmu-vmsa: Unify ipmmu_ops iommu/ipmmu-vmsa: Clean up struct ipmmu_vmsa_iommu_priv iommu/ipmmu-vmsa: Simplify group allocation iommu/ipmmu-vmsa: Unify domain alloc/free iommu/ipmmu-vmsa: Fix return value check in ipmmu_find_group_dma() iommu/vt-d: Clear pasid table entry when memory unbound iommu/vt-d: Clear Page Request Overflow fault bit iommu/vt-d: Missing checks for pasid tables if allocation fails iommu/amd: Limit the IOVA page range to the specified addresses ...
-rw-r--r--drivers/gpu/drm/tegra/drm.c3
-rw-r--r--drivers/gpu/host1x/dev.c3
-rw-r--r--drivers/iommu/amd_iommu.c43
-rw-r--r--drivers/iommu/arm-smmu-v3.c214
-rw-r--r--drivers/iommu/arm-smmu.c31
-rw-r--r--drivers/iommu/dma-iommu.c24
-rw-r--r--drivers/iommu/dmar.c10
-rw-r--r--drivers/iommu/exynos-iommu.c23
-rw-r--r--drivers/iommu/intel-iommu.c28
-rw-r--r--drivers/iommu/intel-svm.c4
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c7
-rw-r--r--drivers/iommu/io-pgtable-arm.c7
-rw-r--r--drivers/iommu/iova.c220
-rw-r--r--drivers/iommu/ipmmu-vmsa.c527
-rw-r--r--drivers/iommu/mtk_iommu.c7
-rw-r--r--drivers/iommu/mtk_iommu_v1.c2
-rw-r--r--drivers/iommu/omap-iommu.c375
-rw-r--r--drivers/iommu/omap-iommu.h30
-rw-r--r--drivers/iommu/qcom_iommu.c33
-rw-r--r--drivers/misc/mic/scif/scif_rma.c3
-rw-r--r--include/linux/dmar.h1
-rw-r--r--include/linux/intel-iommu.h1
-rw-r--r--include/linux/iova.h14
23 files changed, 983 insertions, 627 deletions
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 597d563d636a..b822e484b7e5 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
order = __ffs(tegra->domain->pgsize_bitmap);
init_iova_domain(&tegra->carveout.domain, 1UL << order,
- carveout_start >> order,
- carveout_end >> order);
+ carveout_start >> order);
tegra->carveout.shift = iova_shift(&tegra->carveout.domain);
tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 7f22c5c37660..5267c62e8896 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev)
order = __ffs(host->domain->pgsize_bitmap);
init_iova_domain(&host->iova, 1UL << order,
- geometry->aperture_start >> order,
- geometry->aperture_end >> order);
+ geometry->aperture_start >> order);
host->iova_end = geometry->aperture_end;
}
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 9c848e36f209..7d5eb004091d 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -63,7 +63,6 @@
/* IO virtual address start page frame number */
#define IOVA_START_PFN (1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
-#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
/* Reserved IOVA ranges */
#define MSI_RANGE_START (0xfee00000)
@@ -1547,10 +1546,11 @@ static unsigned long dma_ops_alloc_iova(struct device *dev,
if (dma_mask > DMA_BIT_MASK(32))
pfn = alloc_iova_fast(&dma_dom->iovad, pages,
- IOVA_PFN(DMA_BIT_MASK(32)));
+ IOVA_PFN(DMA_BIT_MASK(32)), false);
if (!pfn)
- pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask));
+ pfn = alloc_iova_fast(&dma_dom->iovad, pages,
+ IOVA_PFN(dma_mask), true);
return (pfn << PAGE_SHIFT);
}
@@ -1788,8 +1788,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
if (!dma_dom->domain.pt_root)
goto free_dma_dom;
- init_iova_domain(&dma_dom->iovad, PAGE_SIZE,
- IOVA_START_PFN, DMA_32BIT_PFN);
+ init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN);
if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
goto free_dma_dom;
@@ -2383,11 +2382,9 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
size_t size,
int dir)
{
- dma_addr_t flush_addr;
dma_addr_t i, start;
unsigned int pages;
- flush_addr = dma_addr;
pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
dma_addr &= PAGE_MASK;
start = dma_addr;
@@ -2696,8 +2693,7 @@ static int init_reserved_iova_ranges(void)
struct pci_dev *pdev = NULL;
struct iova *val;
- init_iova_domain(&reserved_iova_ranges, PAGE_SIZE,
- IOVA_START_PFN, DMA_32BIT_PFN);
+ init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN);
lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock,
&reserved_rbtree_key);
@@ -3155,7 +3151,7 @@ static void amd_iommu_apply_resv_region(struct device *dev,
unsigned long start, end;
start = IOVA_PFN(region->start);
- end = IOVA_PFN(region->start + region->length);
+ end = IOVA_PFN(region->start + region->length - 1);
WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
}
@@ -3663,11 +3659,11 @@ out_unlock:
return table;
}
-static int alloc_irq_index(u16 devid, int count)
+static int alloc_irq_index(u16 devid, int count, bool align)
{
struct irq_remap_table *table;
+ int index, c, alignment = 1;
unsigned long flags;
- int index, c;
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
@@ -3677,16 +3673,21 @@ static int alloc_irq_index(u16 devid, int count)
if (!table)
return -ENODEV;
+ if (align)
+ alignment = roundup_pow_of_two(count);
+
spin_lock_irqsave(&table->lock, flags);
/* Scan table for free entries */
- for (c = 0, index = table->min_index;
- index < MAX_IRQS_PER_TABLE;
- ++index) {
- if (!iommu->irte_ops->is_allocated(table, index))
+ for (index = ALIGN(table->min_index, alignment), c = 0;
+ index < MAX_IRQS_PER_TABLE;) {
+ if (!iommu->irte_ops->is_allocated(table, index)) {
c += 1;
- else
- c = 0;
+ } else {
+ c = 0;
+ index = ALIGN(index + 1, alignment);
+ continue;
+ }
if (c == count) {
for (; c != 0; --c)
@@ -3695,6 +3696,8 @@ static int alloc_irq_index(u16 devid, int count)
index -= count - 1;
goto out;
}
+
+ index++;
}
index = -ENOSPC;
@@ -4099,7 +4102,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
else
ret = -ENOMEM;
} else {
- index = alloc_irq_index(devid, nr_irqs);
+ bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI);
+
+ index = alloc_irq_index(devid, nr_irqs, align);
}
if (index < 0) {
pr_warn("Failed to allocate IRTE\n");
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index e67ba6c40faf..f122071688fd 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -316,6 +316,7 @@
#define ARM64_TCR_TBI0_MASK 0x1UL
#define CTXDESC_CD_0_AA64 (1UL << 41)
+#define CTXDESC_CD_0_S (1UL << 44)
#define CTXDESC_CD_0_R (1UL << 45)
#define CTXDESC_CD_0_A (1UL << 46)
#define CTXDESC_CD_0_ASET_SHIFT 47
@@ -377,7 +378,16 @@
#define CMDQ_SYNC_0_CS_SHIFT 12
#define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT)
+#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT)
#define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT)
+#define CMDQ_SYNC_0_MSH_SHIFT 22
+#define CMDQ_SYNC_0_MSH_ISH (3UL << CMDQ_SYNC_0_MSH_SHIFT)
+#define CMDQ_SYNC_0_MSIATTR_SHIFT 24
+#define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
+#define CMDQ_SYNC_0_MSIDATA_SHIFT 32
+#define CMDQ_SYNC_0_MSIDATA_MASK 0xffffffffUL
+#define CMDQ_SYNC_1_MSIADDR_SHIFT 0
+#define CMDQ_SYNC_1_MSIADDR_MASK 0xffffffffffffcUL
/* Event queue */
#define EVTQ_ENT_DWORDS 4
@@ -408,20 +418,12 @@
/* High-level queue structures */
#define ARM_SMMU_POLL_TIMEOUT_US 100
-#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */
+#define ARM_SMMU_CMDQ_SYNC_TIMEOUT_US 1000000 /* 1s! */
+#define ARM_SMMU_CMDQ_SYNC_SPIN_COUNT 10
#define MSI_IOVA_BASE 0x8000000
#define MSI_IOVA_LENGTH 0x100000
-/* Until ACPICA headers cover IORT rev. C */
-#ifndef ACPI_IORT_SMMU_HISILICON_HI161X
-#define ACPI_IORT_SMMU_HISILICON_HI161X 0x1
-#endif
-
-#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
-#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2
-#endif
-
static bool disable_bypass;
module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
MODULE_PARM_DESC(disable_bypass,
@@ -504,6 +506,10 @@ struct arm_smmu_cmdq_ent {
} pri;
#define CMDQ_OP_CMD_SYNC 0x46
+ struct {
+ u32 msidata;
+ u64 msiaddr;
+ } sync;
};
};
@@ -604,6 +610,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_TRANS_S2 (1 << 10)
#define ARM_SMMU_FEAT_STALLS (1 << 11)
#define ARM_SMMU_FEAT_HYP (1 << 12)
+#define ARM_SMMU_FEAT_STALL_FORCE (1 << 13)
u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
@@ -616,6 +623,7 @@ struct arm_smmu_device {
int gerr_irq;
int combined_irq;
+ atomic_t sync_nr;
unsigned long ias; /* IPA */
unsigned long oas; /* PA */
@@ -634,6 +642,8 @@ struct arm_smmu_device {
struct arm_smmu_strtab_cfg strtab_cfg;
+ u32 sync_count;
+
/* IOMMU core code handle */
struct iommu_device iommu;
};
@@ -757,26 +767,29 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
* Wait for the SMMU to consume items. If drain is true, wait until the queue
* is empty. Otherwise, wait until there is at least one free slot.
*/
-static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
+static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
{
ktime_t timeout;
- unsigned int delay = 1;
+ unsigned int delay = 1, spin_cnt = 0;
- /* Wait longer if it's queue drain */
- timeout = ktime_add_us(ktime_get(), drain ?
- ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US :
+ /* Wait longer if it's a CMD_SYNC */
+ timeout = ktime_add_us(ktime_get(), sync ?
+ ARM_SMMU_CMDQ_SYNC_TIMEOUT_US :
ARM_SMMU_POLL_TIMEOUT_US);
- while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
+ while (queue_sync_cons(q), (sync ? !queue_empty(q) : queue_full(q))) {
if (ktime_compare(ktime_get(), timeout) > 0)
return -ETIMEDOUT;
if (wfe) {
wfe();
- } else {
+ } else if (++spin_cnt < ARM_SMMU_CMDQ_SYNC_SPIN_COUNT) {
cpu_relax();
+ continue;
+ } else {
udelay(delay);
delay *= 2;
+ spin_cnt = 0;
}
}
@@ -878,7 +891,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
}
break;
case CMDQ_OP_CMD_SYNC:
- cmd[0] |= CMDQ_SYNC_0_CS_SEV;
+ if (ent->sync.msiaddr)
+ cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
+ else
+ cmd[0] |= CMDQ_SYNC_0_CS_SEV;
+ cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
+ cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
+ cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
break;
default:
return -ENOENT;
@@ -936,13 +955,22 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
}
+static void arm_smmu_cmdq_insert_cmd(struct arm_smmu_device *smmu, u64 *cmd)
+{
+ struct arm_smmu_queue *q = &smmu->cmdq.q;
+ bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+
+ while (queue_insert_raw(q, cmd) == -ENOSPC) {
+ if (queue_poll_cons(q, false, wfe))
+ dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
+ }
+}
+
static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
struct arm_smmu_cmdq_ent *ent)
{
u64 cmd[CMDQ_ENT_DWORDS];
unsigned long flags;
- bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
- struct arm_smmu_queue *q = &smmu->cmdq.q;
if (arm_smmu_cmdq_build_cmd(cmd, ent)) {
dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n",
@@ -951,14 +979,76 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
}
spin_lock_irqsave(&smmu->cmdq.lock, flags);
- while (queue_insert_raw(q, cmd) == -ENOSPC) {
- if (queue_poll_cons(q, false, wfe))
- dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
- }
+ arm_smmu_cmdq_insert_cmd(smmu, cmd);
+ spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+}
- if (ent->opcode == CMDQ_OP_CMD_SYNC && queue_poll_cons(q, true, wfe))
- dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
+/*
+ * The difference between val and sync_idx is bounded by the maximum size of
+ * a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
+ */
+static int __arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
+{
+ ktime_t timeout;
+ u32 val;
+
+ timeout = ktime_add_us(ktime_get(), ARM_SMMU_CMDQ_SYNC_TIMEOUT_US);
+ val = smp_cond_load_acquire(&smmu->sync_count,
+ (int)(VAL - sync_idx) >= 0 ||
+ !ktime_before(ktime_get(), timeout));
+
+ return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
+}
+
+static int __arm_smmu_cmdq_issue_sync_msi(struct arm_smmu_device *smmu)
+{
+ u64 cmd[CMDQ_ENT_DWORDS];
+ unsigned long flags;
+ struct arm_smmu_cmdq_ent ent = {
+ .opcode = CMDQ_OP_CMD_SYNC,
+ .sync = {
+ .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
+ .msiaddr = virt_to_phys(&smmu->sync_count),
+ },
+ };
+
+ arm_smmu_cmdq_build_cmd(cmd, &ent);
+
+ spin_lock_irqsave(&smmu->cmdq.lock, flags);
+ arm_smmu_cmdq_insert_cmd(smmu, cmd);
+ spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+
+ return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
+}
+
+static int __arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+{
+ u64 cmd[CMDQ_ENT_DWORDS];
+ unsigned long flags;
+ bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+ struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
+ int ret;
+
+ arm_smmu_cmdq_build_cmd(cmd, &ent);
+
+ spin_lock_irqsave(&smmu->cmdq.lock, flags);
+ arm_smmu_cmdq_insert_cmd(smmu, cmd);
+ ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
+
+ return ret;
+}
+
+static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
+{
+ int ret;
+ bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
+ (smmu->features & ARM_SMMU_FEAT_COHERENCY);
+
+ ret = msi ? __arm_smmu_cmdq_issue_sync_msi(smmu)
+ : __arm_smmu_cmdq_issue_sync(smmu);
+ if (ret)
+ dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
}
/* Context descriptor manipulation functions */
@@ -996,6 +1086,11 @@ static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu,
CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET_PRIVATE |
CTXDESC_CD_0_AA64 | (u64)cfg->cd.asid << CTXDESC_CD_0_ASID_SHIFT |
CTXDESC_CD_0_V;
+
+ /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */
+ if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE)
+ val |= CTXDESC_CD_0_S;
+
cfg->cdptr[0] = cpu_to_le64(val);
val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK << CTXDESC_CD_1_TTB0_SHIFT;
@@ -1029,8 +1124,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
};
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- cmd.opcode = CMDQ_OP_CMD_SYNC;
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ arm_smmu_cmdq_issue_sync(smmu);
}
static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
@@ -1094,7 +1188,11 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
<< STRTAB_STE_1_SHCFG_SHIFT);
dst[2] = 0; /* Nuke the VMID */
- if (ste_live)
+ /*
+ * The SMMU can perform negative caching, so we must sync
+ * the STE regardless of whether the old value was live.
+ */
+ if (smmu)
arm_smmu_sync_ste_for_sid(smmu, sid);
return;
}
@@ -1112,7 +1210,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
#endif
STRTAB_STE_1_STRW_NSEL1 << STRTAB_STE_1_STRW_SHIFT);
- if (smmu->features & ARM_SMMU_FEAT_STALLS)
+ if (smmu->features & ARM_SMMU_FEAT_STALLS &&
+ !(smmu->features & ARM_SMMU_FEAT_STALL_FORCE))
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
val |= (ste->s1_cfg->cdptr_dma & STRTAB_STE_0_S1CTXPTR_MASK
@@ -1275,12 +1374,6 @@ static irqreturn_t arm_smmu_priq_thread(int irq, void *dev)
return IRQ_HANDLED;
}
-static irqreturn_t arm_smmu_cmdq_sync_handler(int irq, void *dev)
-{
- /* We don't actually use CMD_SYNC interrupts for anything */
- return IRQ_HANDLED;
-}
-
static int arm_smmu_device_disable(struct arm_smmu_device *smmu);
static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
@@ -1313,10 +1406,8 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev)
if (active & GERROR_MSI_EVTQ_ABT_ERR)
dev_warn(smmu->dev, "EVTQ MSI write aborted\n");
- if (active & GERROR_MSI_CMDQ_ABT_ERR) {
+ if (active & GERROR_MSI_CMDQ_ABT_ERR)
dev_warn(smmu->dev, "CMDQ MSI write aborted\n");
- arm_smmu_cmdq_sync_handler(irq, smmu->dev);
- }
if (active & GERROR_PRIQ_ABT_ERR)
dev_err(smmu->dev, "PRIQ write aborted -- events may have been lost\n");
@@ -1345,17 +1436,13 @@ static irqreturn_t arm_smmu_combined_irq_thread(int irq, void *dev)
static irqreturn_t arm_smmu_combined_irq_handler(int irq, void *dev)
{
arm_smmu_gerror_handler(irq, dev);
- arm_smmu_cmdq_sync_handler(irq, dev);
return IRQ_WAKE_THREAD;
}
/* IO_PGTABLE API */
static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
{
- struct arm_smmu_cmdq_ent cmd;
-
- cmd.opcode = CMDQ_OP_CMD_SYNC;
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ arm_smmu_cmdq_issue_sync(smmu);
}
static void arm_smmu_tlb_sync(void *cookie)
@@ -1743,6 +1830,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
return ops->unmap(ops, iova, size);
}
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+{
+ struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
+
+ if (smmu)
+ __arm_smmu_tlb_sync(smmu);
+}
+
static phys_addr_t
arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
{
@@ -1963,6 +2058,8 @@ static struct iommu_ops arm_smmu_ops = {
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
.map_sg = default_iommu_map_sg,
+ .flush_iotlb_all = arm_smmu_iotlb_sync,
+ .iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
.remove_device = arm_smmu_remove_device,
@@ -2147,6 +2244,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
{
int ret;
+ atomic_set(&smmu->sync_nr, 0);
ret = arm_smmu_init_queues(smmu);
if (ret)
return ret;
@@ -2265,15 +2363,6 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
dev_warn(smmu->dev, "failed to enable evtq irq\n");
}
- irq = smmu->cmdq.q.irq;
- if (irq) {
- ret = devm_request_irq(smmu->dev, irq,
- arm_smmu_cmdq_sync_handler, 0,
- "arm-smmu-v3-cmdq-sync", smmu);
- if (ret < 0)
- dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
- }
-
irq = smmu->gerr_irq;
if (irq) {
ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
@@ -2399,8 +2488,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
/* Invalidate any cached configuration */
cmd.opcode = CMDQ_OP_CFGI_ALL;
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- cmd.opcode = CMDQ_OP_CMD_SYNC;
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ arm_smmu_cmdq_issue_sync(smmu);
/* Invalidate any stale TLB entries */
if (smmu->features & ARM_SMMU_FEAT_HYP) {
@@ -2410,8 +2498,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- cmd.opcode = CMDQ_OP_CMD_SYNC;
- arm_smmu_cmdq_issue_cmd(smmu, &cmd);
+ arm_smmu_cmdq_issue_sync(smmu);
/* Event queue */
writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
@@ -2532,13 +2619,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
* register, but warn on mismatch.
*/
if (!!(reg & IDR0_COHACC) != coherent)
- dev_warn(smmu->dev, "IDR0.COHACC overridden by dma-coherent property (%s)\n",
+ dev_warn(smmu->dev, "IDR0.COHACC overridden by FW configuration (%s)\n",
coherent ? "true" : "false");
switch (reg & IDR0_STALL_MODEL_MASK << IDR0_STALL_MODEL_SHIFT) {
- case IDR0_STALL_MODEL_STALL:
- /* Fallthrough */
case IDR0_STALL_MODEL_FORCE:
+ smmu->features |= ARM_SMMU_FEAT_STALL_FORCE;
+ /* Fallthrough */
+ case IDR0_STALL_MODEL_STALL:
smmu->features |= ARM_SMMU_FEAT_STALLS;
}
@@ -2665,7 +2753,7 @@ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
break;
- case ACPI_IORT_SMMU_HISILICON_HI161X:
+ case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
break;
}
@@ -2783,10 +2871,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
if (irq > 0)
smmu->priq.q.irq = irq;
- irq = platform_get_irq_byname(pdev, "cmdq-sync");
- if (irq > 0)
- smmu->cmdq.q.irq = irq;
-
irq = platform_get_irq_byname(pdev, "gerror");
if (irq > 0)
smmu->gerr_irq = irq;
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 3bdb799d3b4b..78d4c6b8f1ba 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -59,6 +59,7 @@
#define ARM_MMU500_ACTLR_CPRE (1 << 1)
#define ARM_MMU500_ACR_CACHE_LOCK (1 << 26)
+#define ARM_MMU500_ACR_S2CRB_TLBEN (1 << 10)
#define ARM_MMU500_ACR_SMTNMB_TLBEN (1 << 8)
#define TLB_LOOP_TIMEOUT 1000000 /* 1s! */
@@ -119,14 +120,6 @@ enum arm_smmu_implementation {
CAVIUM_SMMUV2,
};
-/* Until ACPICA headers cover IORT rev. C */
-#ifndef ACPI_IORT_SMMU_CORELINK_MMU401
-#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4
-#endif
-#ifndef ACPI_IORT_SMMU_CAVIUM_THUNDERX
-#define ACPI_IORT_SMMU_CAVIUM_THUNDERX 0x5
-#endif
-
struct arm_smmu_s2cr {
struct iommu_group *group;
int count;
@@ -250,6 +243,7 @@ enum arm_smmu_domain_stage {
struct arm_smmu_domain {
struct arm_smmu_device *smmu;
struct io_pgtable_ops *pgtbl_ops;
+ const struct iommu_gather_ops *tlb_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage;
struct mutex init_mutex; /* Protects smmu pointer */
@@ -735,7 +729,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
enum io_pgtable_fmt fmt;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
- const struct iommu_gather_ops *tlb_ops;
mutex_lock(&smmu_domain->init_mutex);
if (smmu_domain->smmu)
@@ -813,7 +806,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
ias = min(ias, 32UL);
oas = min(oas, 32UL);
}
- tlb_ops = &arm_smmu_s1_tlb_ops;
+ smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
break;
case ARM_SMMU_DOMAIN_NESTED:
/*
@@ -833,9 +826,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
oas = min(oas, 40UL);
}
if (smmu->version == ARM_SMMU_V2)
- tlb_ops = &arm_smmu_s2_tlb_ops_v2;
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
else
- tlb_ops = &arm_smmu_s2_tlb_ops_v1;
+ smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
break;
default:
ret = -EINVAL;
@@ -863,7 +856,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
.pgsize_bitmap = smmu->pgsize_bitmap,
.ias = ias,
.oas = oas,
- .tlb = tlb_ops,
+ .tlb = smmu_domain->tlb_ops,
.iommu_dev = smmu->dev,
};
@@ -1259,6 +1252,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
return ops->unmap(ops, iova, size);
}
+static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (smmu_domain->tlb_ops)
+ smmu_domain->tlb_ops->tlb_sync(smmu_domain);
+}
+
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
dma_addr_t iova)
{
@@ -1562,6 +1563,8 @@ static struct iommu_ops arm_smmu_ops = {
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
.map_sg = default_iommu_map_sg,
+ .flush_iotlb_all = arm_smmu_iotlb_sync,
+ .iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
.remove_device = arm_smmu_remove_device,
@@ -1606,7 +1609,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
* Allow unmatched Stream IDs to allocate bypass
* TLB entries for reduced latency.
*/
- reg |= ARM_MMU500_ACR_SMTNMB_TLBEN;
+ reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9d1cebe7f6cb..25914d36c5ac 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
/* ...then finally give it a kicking to make sure it fits */
base_pfn = max_t(unsigned long, base_pfn,
domain->geometry.aperture_start >> order);
- end_pfn = min_t(unsigned long, end_pfn,
- domain->geometry.aperture_end >> order);
}
- /*
- * PCI devices may have larger DMA masks, but still prefer allocating
- * within a 32-bit mask to avoid DAC addressing. Such limitations don't
- * apply to the typical platform device, so for those we may as well
- * leave the cache limit at the top of their range to save an rb_last()
- * traversal on every allocation.
- */
- if (dev && dev_is_pci(dev))
- end_pfn &= DMA_BIT_MASK(32) >> order;
/* start_pfn is always nonzero for an already-initialised domain */
if (iovad->start_pfn) {
@@ -312,16 +301,11 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
pr_warn("Incompatible range for DMA domain\n");
return -EFAULT;
}
- /*
- * If we have devices with different DMA masks, move the free
- * area cache limit down for the benefit of the smaller one.
- */
- iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn);
return 0;
}
- init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn);
+ init_iova_domain(iovad, 1UL << order, base_pfn);
if (!dev)
return 0;
@@ -386,10 +370,12 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
/* Try to get PCI devices a SAC address */
if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
- iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift);
+ iova = alloc_iova_fast(iovad, iova_len,
+ DMA_BIT_MASK(32) >> shift, false);
if (!iova)
- iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift);
+ iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift,
+ true);
return (dma_addr_t)iova << shift;
}
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 57c920c1372d..9a7ffd13c7f0 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -801,13 +801,16 @@ int __init dmar_dev_scope_init(void)
dmar_free_pci_notify_info(info);
}
}
-
- bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
}
return dmar_dev_scope_status;
}
+void dmar_register_bus_notifier(void)
+{
+ bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
+}
+
int __init dmar_table_init(void)
{
@@ -1676,7 +1679,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id)
raw_spin_lock_irqsave(&iommu->register_lock, flag);
}
- writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG);
+ writel(DMA_FSTS_PFO | DMA_FSTS_PPF | DMA_FSTS_PRO,
+ iommu->reg + DMAR_FSTS_REG);
unlock_exit:
raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 25c2c75f5332..79c45650f8de 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -263,6 +263,7 @@ struct exynos_iommu_domain {
struct sysmmu_drvdata {
struct device *sysmmu; /* SYSMMU controller device */
struct device *master; /* master device (owner) */
+ struct device_link *link; /* runtime PM link to master */
void __iomem *sfrbase; /* our registers */
struct clk *clk; /* SYSMMU's clock */
struct clk *aclk; /* SYSMMU's aclk clock */
@@ -1250,6 +1251,8 @@ static struct iommu_group *get_device_iommu_group(struct device *dev)
static int exynos_iommu_add_device(struct device *dev)
{
+ struct exynos_iommu_owner *owner = dev->archdata.iommu;
+ struct sysmmu_drvdata *data;
struct iommu_group *group;
if (!has_sysmmu(dev))
@@ -1260,6 +1263,15 @@ static int exynos_iommu_add_device(struct device *dev)
if (IS_ERR(group))</