summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-07-12 10:00:04 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-07-12 10:00:04 -0700
commitfb4e3beeffa47619985f190663c6ef424f063a22 (patch)
tree72a9c5f7a49a003daddea7cb0f66ed8c2137f21f
parent6b1c776d3efbda31085b6a9f3bc7f774511fafd9 (diff)
parent6a7086431fa18df7d03b1ed0126426c79b38dc8c (diff)
Merge tag 'iommu-updates-v4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel: "This update comes with: - Support for lockless operation in the ARM io-pgtable code. This is an important step to solve the scalability problems in the common dma-iommu code for ARM - Some Errata workarounds for ARM SMMU implemenations - Rewrite of the deferred IO/TLB flush code in the AMD IOMMU driver. The code suffered from very high flush rates, with the new implementation the flush rate is down to ~1% of what it was before - Support for amd_iommu=off when booting with kexec. The problem here was that the IOMMU driver bailed out early without disabling the iommu hardware, if it was enabled in the old kernel - The Rockchip IOMMU driver is now available on ARM64 - Align the return value of the iommu_ops->device_group call-backs to not miss error values - Preempt-disable optimizations in the Intel VT-d and common IOVA code to help Linux-RT - Various other small cleanups and fixes" * tag 'iommu-updates-v4.13' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (60 commits) iommu/vt-d: Constify intel_dma_ops iommu: Warn once when device_group callback returns NULL iommu/omap: Return ERR_PTR in device_group call-back iommu: Return ERR_PTR() values from device_group call-backs iommu/s390: Use iommu_group_get_for_dev() in s390_iommu_add_device() iommu/vt-d: Don't disable preemption while accessing deferred_flush() iommu/iova: Don't disable preempt around this_cpu_ptr() iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #126 iommu/arm-smmu-v3: Enable ACPI based HiSilicon CMD_PREFETCH quirk(erratum 161010701) iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2 erratum #74 ACPI/IORT: Fixup SMMUv3 resource size for Cavium ThunderX2 SMMUv3 model iommu/arm-smmu-v3, acpi: Add temporary Cavium SMMU-V3 IORT model number definitions iommu/io-pgtable-arm: Use dma_wmb() instead of wmb() when publishing table iommu/io-pgtable: depend on !GENERIC_ATOMIC64 when using COMPILE_TEST with LPAE iommu/arm-smmu-v3: Remove io-pgtable spinlock iommu/arm-smmu: Remove io-pgtable spinlock iommu/io-pgtable-arm-v7s: Support lockless operation iommu/io-pgtable-arm: Support lockless operation iommu/io-pgtable: Introduce explicit coherency iommu/io-pgtable-arm-v7s: Refactor split_blk_unmap ...
-rw-r--r--Documentation/arm64/silicon-errata.txt5
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt12
-rw-r--r--drivers/acpi/arm64/iort.c83
-rw-r--r--drivers/iommu/Kconfig6
-rw-r--r--drivers/iommu/amd_iommu.c458
-rw-r--r--drivers/iommu/amd_iommu_init.c44
-rw-r--r--drivers/iommu/amd_iommu_types.h3
-rw-r--r--drivers/iommu/arm-smmu-v3.c229
-rw-r--r--drivers/iommu/arm-smmu.c64
-rw-r--r--drivers/iommu/dma-iommu.c2
-rw-r--r--drivers/iommu/intel-iommu.c26
-rw-r--r--drivers/iommu/intel-svm.c30
-rw-r--r--drivers/iommu/intel_irq_remapping.c4
-rw-r--r--drivers/iommu/io-pgtable-arm-v7s.c183
-rw-r--r--drivers/iommu/io-pgtable-arm.c189
-rw-r--r--drivers/iommu/io-pgtable.h6
-rw-r--r--drivers/iommu/iommu.c17
-rw-r--r--drivers/iommu/iova.c30
-rw-r--r--drivers/iommu/ipmmu-vmsa.c353
-rw-r--r--drivers/iommu/omap-iommu.c2
-rw-r--r--drivers/iommu/s390-iommu.c15
-rw-r--r--include/linux/intel-svm.h20
22 files changed, 1233 insertions, 548 deletions
diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
index f5f93dca54b7..66e8ce14d23d 100644
--- a/Documentation/arm64/silicon-errata.txt
+++ b/Documentation/arm64/silicon-errata.txt
@@ -61,12 +61,15 @@ stable kernels.
| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
| Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 |
-| Cavium | ThunderX SMMUv2 | #27704 | N/A |
| Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 |
+| Cavium | ThunderX SMMUv2 | #27704 | N/A |
+| Cavium | ThunderX2 SMMUv3| #74 | N/A |
+| Cavium | ThunderX2 SMMUv3| #126 | N/A |
| | | | |
| Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 |
| | | | |
| Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 |
+| Hisilicon | Hip0{6,7} | #161010701 | N/A |
| | | | |
| Qualcomm Tech. | Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
index be57550e14e4..c9abbf3e4f68 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
@@ -26,6 +26,12 @@ the PCIe specification.
* "priq" - PRI Queue not empty
* "cmdq-sync" - CMD_SYNC complete
* "gerror" - Global Error activated
+ * "combined" - The combined interrupt is optional,
+ and should only be provided if the
+ hardware supports just a single,
+ combined interrupt line.
+ If provided, then the combined interrupt
+ will be used in preference to any others.
- #iommu-cells : See the generic IOMMU binding described in
devicetree/bindings/pci/pci-iommu.txt
@@ -49,6 +55,12 @@ the PCIe specification.
- hisilicon,broken-prefetch-cmd
: Avoid sending CMD_PREFETCH_* commands to the SMMU.
+- cavium,cn9900-broken-page1-regspace
+ : Replaces all page 1 offsets used for EVTQ_PROD/CONS,
+ PRIQ_PROD/CONS register access with page 0 offsets.
+ Set for Cavium ThunderX2 silicon that doesn't support
+ SMMU page1 register space.
+
** Example
smmu@2b400000 {
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index d048f72c23f8..a3215ee671c1 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -31,6 +31,11 @@
#define IORT_IOMMU_TYPE ((1 << ACPI_IORT_NODE_SMMU) | \
(1 << ACPI_IORT_NODE_SMMU_V3))
+/* Until ACPICA headers cover IORT rev. C */
+#ifndef ACPI_IORT_SMMU_V3_CAVIUM_CN99XX
+#define ACPI_IORT_SMMU_V3_CAVIUM_CN99XX 0x2
+#endif
+
struct iort_its_msi_chip {
struct list_head list;
struct fwnode_handle *fw_node;
@@ -819,6 +824,36 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node)
return num_res;
}
+static bool arm_smmu_v3_is_combined_irq(struct acpi_iort_smmu_v3 *smmu)
+{
+ /*
+ * Cavium ThunderX2 implementation doesn't not support unique
+ * irq line. Use single irq line for all the SMMUv3 interrupts.
+ */
+ if (smmu->model != ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+ return false;
+
+ /*
+ * ThunderX2 doesn't support MSIs from the SMMU, so we're checking
+ * SPI numbers here.
+ */
+ return smmu->event_gsiv == smmu->pri_gsiv &&
+ smmu->event_gsiv == smmu->gerr_gsiv &&
+ smmu->event_gsiv == smmu->sync_gsiv;
+}
+
+static unsigned long arm_smmu_v3_resource_size(struct acpi_iort_smmu_v3 *smmu)
+{
+ /*
+ * Override the size, for Cavium ThunderX2 implementation
+ * which doesn't support the page 1 SMMU register space.
+ */
+ if (smmu->model == ACPI_IORT_SMMU_V3_CAVIUM_CN99XX)
+ return SZ_64K;
+
+ return SZ_128K;
+}
+
static void __init arm_smmu_v3_init_resources(struct resource *res,
struct acpi_iort_node *node)
{
@@ -829,30 +864,38 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
res[num_res].start = smmu->base_address;
- res[num_res].end = smmu->base_address + SZ_128K - 1;
+ res[num_res].end = smmu->base_address +
+ arm_smmu_v3_resource_size(smmu) - 1;
res[num_res].flags = IORESOURCE_MEM;
num_res++;
+ if (arm_smmu_v3_is_combined_irq(smmu)) {
+ if (smmu->event_gsiv)
+ acpi_iort_register_irq(smmu->event_gsiv, "combined",
+ ACPI_EDGE_SENSITIVE,
+ &res[num_res++]);
+ } else {
- if (smmu->event_gsiv)
- acpi_iort_register_irq(smmu->event_gsiv, "eventq",
- ACPI_EDGE_SENSITIVE,
- &res[num_res++]);
-
- if (smmu->pri_gsiv)
- acpi_iort_register_irq(smmu->pri_gsiv, "priq",
- ACPI_EDGE_SENSITIVE,
- &res[num_res++]);
-
- if (smmu->gerr_gsiv)
- acpi_iort_register_irq(smmu->gerr_gsiv, "gerror",
- ACPI_EDGE_SENSITIVE,
- &res[num_res++]);
-
- if (smmu->sync_gsiv)
- acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync",
- ACPI_EDGE_SENSITIVE,
- &res[num_res++]);
+ if (smmu->event_gsiv)
+ acpi_iort_register_irq(smmu->event_gsiv, "eventq",
+ ACPI_EDGE_SENSITIVE,
+ &res[num_res++]);
+
+ if (smmu->pri_gsiv)
+ acpi_iort_register_irq(smmu->pri_gsiv, "priq",
+ ACPI_EDGE_SENSITIVE,
+ &res[num_res++]);
+
+ if (smmu->gerr_gsiv)
+ acpi_iort_register_irq(smmu->gerr_gsiv, "gerror",
+ ACPI_EDGE_SENSITIVE,
+ &res[num_res++]);
+
+ if (smmu->sync_gsiv)
+ acpi_iort_register_irq(smmu->sync_gsiv, "cmdq-sync",
+ ACPI_EDGE_SENSITIVE,
+ &res[num_res++]);
+ }
}
static bool __init arm_smmu_v3_is_coherent(struct acpi_iort_node *node)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 6ee3a25ae731..f73ff28f77e2 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE
config IOMMU_IO_PGTABLE_LPAE
bool "ARMv7/v8 Long Descriptor Format"
select IOMMU_IO_PGTABLE
- depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST)
+ depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64))
help
Enable support for the ARM long descriptor pagetable format.
This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
@@ -219,7 +219,7 @@ config OMAP_IOMMU_DEBUG
config ROCKCHIP_IOMMU
bool "Rockchip IOMMU Support"
- depends on ARM
+ depends on ARM || ARM64
depends on ARCH_ROCKCHIP || COMPILE_TEST
select IOMMU_API
select ARM_DMA_USE_IOMMU
@@ -274,7 +274,7 @@ config EXYNOS_IOMMU_DEBUG
config IPMMU_VMSA
bool "Renesas VMSA-compatible IPMMU"
- depends on ARM_LPAE
+ depends on ARM || IOMMU_DMA
depends on ARCH_RENESAS || COMPILE_TEST
select IOMMU_API
select IOMMU_IO_PGTABLE_LPAE
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f16d0f26ee24..688e77576e5a 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -91,25 +91,6 @@ LIST_HEAD(ioapic_map);
LIST_HEAD(hpet_map);
LIST_HEAD(acpihid_map);
-#define FLUSH_QUEUE_SIZE 256
-
-struct flush_queue_entry {
- unsigned long iova_pfn;
- unsigned long pages;
- struct dma_ops_domain *dma_dom;
-};
-
-struct flush_queue {
- spinlock_t lock;
- unsigned next;
- struct flush_queue_entry *entries;
-};
-
-static DEFINE_PER_CPU(struct flush_queue, flush_queue);
-
-static atomic_t queue_timer_on;
-static struct timer_list queue_timer;
-
/*
* Domain for untranslated devices - only allocated
* if iommu=pt passed on kernel cmd line.
@@ -140,6 +121,8 @@ struct iommu_dev_data {
PPR completions */
u32 errata; /* Bitmap for errata to apply */
bool use_vapic; /* Enable device to use vapic mode */
+
+ struct ratelimit_state rs; /* Ratelimit IOPF messages */
};
/*
@@ -155,6 +138,20 @@ static void update_domain(struct protection_domain *domain);
static int protection_domain_init(struct protection_domain *domain);
static void detach_device(struct device *dev);
+#define FLUSH_QUEUE_SIZE 256
+
+struct flush_queue_entry {
+ unsigned long iova_pfn;
+ unsigned long pages;
+ u64 counter; /* Flush counter when this entry was added to the queue */
+};
+
+struct flush_queue {
+ struct flush_queue_entry *entries;
+ unsigned head, tail;
+ spinlock_t lock;
+};
+
/*
* Data container for a dma_ops specific protection domain
*/
@@ -164,6 +161,36 @@ struct dma_ops_domain {
/* IOVA RB-Tree */
struct iova_domain iovad;
+
+ struct flush_queue __percpu *flush_queue;
+
+ /*
+ * We need two counter here to be race-free wrt. IOTLB flushing and
+ * adding entries to the flush queue.
+ *
+ * The flush_start_cnt is incremented _before_ the IOTLB flush starts.
+ * New entries added to the flush ring-buffer get their 'counter' value
+ * from here. This way we can make sure that entries added to the queue
+ * (or other per-cpu queues of the same domain) while the TLB is about
+ * to be flushed are not considered to be flushed already.
+ */
+ atomic64_t flush_start_cnt;
+
+ /*
+ * The flush_finish_cnt is incremented when an IOTLB flush is complete.
+ * This value is always smaller than flush_start_cnt. The queue_add
+ * function frees all IOVAs that have a counter value smaller than
+ * flush_finish_cnt. This makes sure that we only free IOVAs that are
+ * flushed out of the IOTLB of the domain.
+ */
+ atomic64_t flush_finish_cnt;
+
+ /*
+ * Timer to make sure we don't keep IOVAs around unflushed
+ * for too long
+ */
+ struct timer_list flush_timer;
+ atomic_t flush_timer_on;
};
static struct iova_domain reserved_iova_ranges;
@@ -255,6 +282,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid)
list_add_tail(&dev_data->dev_data_list, &dev_data_list);
spin_unlock_irqrestore(&dev_data_list_lock, flags);
+ ratelimit_default_init(&dev_data->rs);
+
return dev_data;
}
@@ -553,6 +582,29 @@ static void dump_command(unsigned long phys_addr)
pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]);
}
+static void amd_iommu_report_page_fault(u16 devid, u16 domain_id,
+ u64 address, int flags)
+{
+ struct iommu_dev_data *dev_data = NULL;
+ struct pci_dev *pdev;
+
+ pdev = pci_get_bus_and_slot(PCI_BUS_NUM(devid), devid & 0xff);
+ if (pdev)
+ dev_data = get_dev_data(&pdev->dev);
+
+ if (dev_data && __ratelimit(&dev_data->rs)) {
+ dev_err(&pdev->dev, "AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ domain_id, address, flags);
+ } else if (printk_ratelimit()) {
+ pr_err("AMD-Vi: Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n",
+ PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
+ domain_id, address, flags);
+ }
+
+ if (pdev)
+ pci_dev_put(pdev);
+}
+
static void iommu_print_event(struct amd_iommu *iommu, void *__evt)
{
int type, devid, domid, flags;
@@ -577,7 +629,12 @@ retry:
goto retry;
}
- printk(KERN_ERR "AMD-Vi: Event logged [");
+ if (type == EVENT_TYPE_IO_FAULT) {
+ amd_iommu_report_page_fault(devid, domid, address, flags);
+ return;
+ } else {
+ printk(KERN_ERR "AMD-Vi: Event logged [");
+ }
switch (type) {
case EVENT_TYPE_ILL_DEV:
@@ -587,12 +644,6 @@ retry:
address, flags);
dump_dte_entry(devid);
break;
- case EVENT_TYPE_IO_FAULT:
- printk("IO_PAGE_FAULT device=%02x:%02x.%x "
- "domain=0x%04x address=0x%016llx flags=0x%04x]\n",
- PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid),
- domid, address, flags);
- break;
case EVENT_TYPE_DEV_TAB_ERR:
printk("DEV_TAB_HARDWARE_ERROR device=%02x:%02x.%x "
"address=0x%016llx flags=0x%04x]\n",
@@ -850,19 +901,20 @@ static int wait_on_sem(volatile u64 *sem)
}
static void copy_cmd_to_buffer(struct amd_iommu *iommu,
- struct iommu_cmd *cmd,
- u32 tail)
+ struct iommu_cmd *cmd)
{
u8 *target;
- target = iommu->cmd_buf + tail;
- tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
+ target = iommu->cmd_buf + iommu->cmd_buf_tail;
+
+ iommu->cmd_buf_tail += sizeof(*cmd);
+ iommu->cmd_buf_tail %= CMD_BUFFER_SIZE;
/* Copy command to buffer */
memcpy(target, cmd, sizeof(*cmd));
/* Tell the IOMMU about it */
- writel(tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+ writel(iommu->cmd_buf_tail, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
}
static void build_completion_wait(struct iommu_cmd *cmd, u64 address)
@@ -1020,33 +1072,34 @@ static int __iommu_queue_command_sync(struct amd_iommu *iommu,
struct iommu_cmd *cmd,
bool sync)
{
- u32 left, tail, head, next_tail;
+ unsigned int count = 0;
+ u32 left, next_tail;
+ next_tail = (iommu->cmd_buf_tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
again:
-
- head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
- tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
- next_tail = (tail + sizeof(*cmd)) % CMD_BUFFER_SIZE;
- left = (head - next_tail) % CMD_BUFFER_SIZE;
+ left = (iommu->cmd_buf_head - next_tail) % CMD_BUFFER_SIZE;
if (left <= 0x20) {
- struct iommu_cmd sync_cmd;
- int ret;
-
- iommu->cmd_sem = 0;
+ /* Skip udelay() the first time around */
+ if (count++) {
+ if (count == LOOP_TIMEOUT) {
+ pr_err("AMD-Vi: Command buffer timeout\n");
+ return -EIO;
+ }
- build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
- copy_cmd_to_buffer(iommu, &sync_cmd, tail);
+ udelay(1);
+ }
- if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
- return ret;
+ /* Update head and recheck remaining space */
+ iommu->cmd_buf_head = readl(iommu->mmio_base +
+ MMIO_CMD_HEAD_OFFSET);
goto again;
}
- copy_cmd_to_buffer(iommu, cmd, tail);
+ copy_cmd_to_buffer(iommu, cmd);
- /* We need to sync now to make sure all commands are processed */
+ /* Do we need to make sure all commands are processed? */
iommu->need_sync = sync;
return 0;
@@ -1735,6 +1788,180 @@ static void free_gcr3_table(struct protection_domain *domain)
free_page((unsigned long)domain->gcr3_tbl);
}
+static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct flush_queue *queue;
+
+ queue = per_cpu_ptr(dom->flush_queue, cpu);
+ kfree(queue->entries);
+ }
+
+ free_percpu(dom->flush_queue);
+
+ dom->flush_queue = NULL;
+}
+
+static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
+{
+ int cpu;
+
+ atomic64_set(&dom->flush_start_cnt, 0);
+ atomic64_set(&dom->flush_finish_cnt, 0);
+
+ dom->flush_queue = alloc_percpu(struct flush_queue);
+ if (!dom->flush_queue)
+ return -ENOMEM;
+
+ /* First make sure everything is cleared */
+ for_each_possible_cpu(cpu) {
+ struct flush_queue *queue;
+
+ queue = per_cpu_ptr(dom->flush_queue, cpu);
+ queue->head = 0;
+ queue->tail = 0;
+ queue->entries = NULL;
+ }
+
+ /* Now start doing the allocation */
+ for_each_possible_cpu(cpu) {
+ struct flush_queue *queue;
+
+ queue = per_cpu_ptr(dom->flush_queue, cpu);
+ queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
+ GFP_KERNEL);
+ if (!queue->entries) {
+ dma_ops_domain_free_flush_queue(dom);
+ return -ENOMEM;
+ }
+
+ spin_lock_init(&queue->lock);
+ }
+
+ return 0;
+}
+
+static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
+{
+ atomic64_inc(&dom->flush_start_cnt);
+ domain_flush_tlb(&dom->domain);
+ domain_flush_complete(&dom->domain);
+ atomic64_inc(&dom->flush_finish_cnt);
+}
+
+static inline bool queue_ring_full(struct flush_queue *queue)
+{
+ assert_spin_locked(&queue->lock);
+
+ return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
+}
+
+#define queue_ring_for_each(i, q) \
+ for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
+
+static inline unsigned queue_ring_add(struct flush_queue *queue)
+{
+ unsigned idx = queue->tail;
+
+ assert_spin_locked(&queue->lock);
+ queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
+
+ return idx;
+}
+
+static inline void queue_ring_remove_head(struct flush_queue *queue)
+{
+ assert_spin_locked(&queue->lock);
+ queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
+}
+
+static void queue_ring_free_flushed(struct dma_ops_domain *dom,
+ struct flush_queue *queue)
+{
+ u64 counter = atomic64_read(&dom->flush_finish_cnt);
+ int idx;
+
+ queue_ring_for_each(idx, queue) {
+ /*
+ * This assumes that counter values in the ring-buffer are
+ * monotonously rising.
+ */
+ if (queue->entries[idx].counter >= counter)
+ break;
+
+ free_iova_fast(&dom->iovad,
+ queue->entries[idx].iova_pfn,
+ queue->entries[idx].pages);
+
+ queue_ring_remove_head(queue);
+ }
+}
+
+static void queue_add(struct dma_ops_domain *dom,
+ unsigned long address, unsigned long pages)
+{
+ struct flush_queue *queue;
+ unsigned long flags;
+ int idx;
+
+ pages = __roundup_pow_of_two(pages);
+ address >>= PAGE_SHIFT;
+
+ queue = get_cpu_ptr(dom->flush_queue);
+ spin_lock_irqsave(&queue->lock, flags);
+
+ /*
+ * First remove the enries from the ring-buffer that are already
+ * flushed to make the below queue_ring_full() check less likely
+ */
+ queue_ring_free_flushed(dom, queue);
+
+ /*
+ * When ring-queue is full, flush the entries from the IOTLB so
+ * that we can free all entries with queue_ring_free_flushed()
+ * below.
+ */
+ if (queue_ring_full(queue)) {
+ dma_ops_domain_flush_tlb(dom);
+ queue_ring_free_flushed(dom, queue);
+ }
+
+ idx = queue_ring_add(queue);
+
+ queue->entries[idx].iova_pfn = address;
+ queue->entries[idx].pages = pages;
+ queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt);
+
+ spin_unlock_irqrestore(&queue->lock, flags);
+
+ if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
+ mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));
+
+ put_cpu_ptr(dom->flush_queue);
+}
+
+static void queue_flush_timeout(unsigned long data)
+{
+ struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
+ int cpu;
+
+ atomic_set(&dom->flush_timer_on, 0);
+
+ dma_ops_domain_flush_tlb(dom);
+
+ for_each_possible_cpu(cpu) {
+ struct flush_queue *queue;
+ unsigned long flags;
+
+ queue = per_cpu_ptr(dom->flush_queue, cpu);
+ spin_lock_irqsave(&queue->lock, flags);
+ queue_ring_free_flushed(dom, queue);
+ spin_unlock_irqrestore(&queue->lock, flags);
+ }
+}
+
/*
* Free a domain, only used if something went wrong in the
* allocation path and we need to free an already allocated page table
@@ -1746,6 +1973,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
del_domain_from_list(&dom->domain);
+ if (timer_pending(&dom->flush_timer))
+ del_timer(&dom->flush_timer);
+
+ dma_ops_domain_free_flush_queue(dom);
+
put_iova_domain(&dom->iovad);
free_pagetable(&dom->domain);
@@ -1784,6 +2016,14 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
/* Initialize reserved ranges */
copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
+ if (dma_ops_domain_alloc_flush_queue(dma_dom))
+ goto free_dma_dom;
+
+ setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
+ (unsigned long)dma_dom);
+
+ atomic_set(&dma_dom->flush_timer_on, 0);
+
add_domain_to_list(&dma_dom->domain);
return dma_dom;
@@ -1846,7 +2086,8 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
flags |= tmp;
}
- flags &= ~(0xffffUL);
+
+ flags &= ~(DTE_FLAG_SA | 0xffffULL);
flags |= domain->id;
amd_iommu_dev_table[devid].data[1] = flags;
@@ -2227,92 +2468,6 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
*
*****************************************************************************/
-static void __queue_flush(struct flush_queue *queue)
-{
- struct protection_domain *domain;
- unsigned long flags;
- int idx;
-
- /* First flush TLB of all known domains */
- spin_lock_irqsave(&amd_iommu_pd_lock, flags);
- list_for_each_entry(domain, &amd_iommu_pd_list, list)
- domain_flush_tlb(domain);
- spin_unlock_irqrestore(&amd_iommu_pd_lock, flags);
-
- /* Wait until flushes have completed */
- domain_flush_complete(NULL);
-
- for (idx = 0; idx < queue->next; ++idx) {
- struct flush_queue_entry *entry;
-
- entry = queue->entries + idx;
-
- free_iova_fast(&entry->dma_dom->iovad,
- entry->iova_pfn,
- entry->pages);
-
- /* Not really necessary, just to make sure we catch any bugs */
- entry->dma_dom = NULL;
- }
-
- queue->next = 0;
-}
-
-static void queue_flush_all(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct flush_queue *queue;
- unsigned long flags;
-
- queue = per_cpu_ptr(&flush_queue, cpu);
- spin_lock_irqsave(&queue->lock, flags);
- if (queue->next > 0)
- __queue_flush(queue);
- spin_unlock_irqrestore(&queue->lock, flags);
- }
-}
-
-static void queue_flush_timeout(unsigned long unsused)
-{
- atomic_set(&queue_timer_on, 0);
- queue_flush_all();
-}
-
-static void queue_add(struct dma_ops_domain *dma_dom,
- unsigned long address, unsigned long pages)
-{
- struct flush_queue_entry *entry;
- struct flush_queue *queue;
- unsigned long flags;
- int idx;
-
- pages = __roundup_pow_of_two(pages);
- address >>= PAGE_SHIFT;
-
- queue = get_cpu_ptr(&flush_queue);
- spin_lock_irqsave(&queue->lock, flags);
-
- if (queue->next == FLUSH_QUEUE_SIZE)
- __queue_flush(queue);
-
- idx = queue->next++;
- entry = queue->entries + idx;
-
- entry->iova_pfn = address;
- entry->pages = pages;
- entry->dma_dom = dma_dom;
-
- spin_unlock_irqrestore(&queue->lock, flags);
-
- if (atomic_cmpxchg(&queue_timer_on, 0, 1) == 0)
- mod_timer(&queue_timer, jiffies + msecs_to_jiffies(10));
-
- put_cpu_ptr(&flush_queue);
-}
-
-
/*
* In the dma_ops path we only have the struct device. This function
* finds the corresponding IOMMU, the protection domain and the
@@ -2807,7 +2962,7 @@ static int init_reserved_iova_ranges(void)
int __init amd_iommu_init_api(void)
{
- int ret, cpu, err = 0;
+ int ret, err = 0;
ret = iova_cache_get();
if (ret)
@@ -2817,18 +2972,6 @@ int __init amd_iommu_init_api(void)
if (ret)
return ret;
- for_each_possible_cpu(cpu) {
- struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
-
- queue->entries = kzalloc(FLUSH_QUEUE_SIZE *
- sizeof(*queue->entries),
- GFP_KERNEL);
- if (!queue->entries)
- goto out_put_iova;
-
- spin_lock_init(&queue->lock);
- }
-
err = bus_set_iommu(&pci_bus_type, &amd_iommu_ops);
if (err)
return err;
@@ -2840,23 +2983,12 @@ int __init amd_iommu_init_api(void)
err = bus_set_iommu(&platform_bus_type, &amd_iommu_ops);
if (err)
return err;
- return 0;
-
-out_put_iova:
- for_each_possible_cpu(cpu) {
- struct flush_queue *queue = per_cpu_ptr(&flush_queue, cpu);
-
- kfree(queue->entries);
- }
- return -ENOMEM;
+ return 0;
}
int __init amd_iommu_init_dma_ops(void)
{
- setup_timer(&queue_timer, queue_flush_timeout, 0);
- atomic_set(&queue_timer_on, 0);
-
swiotlb = iommu_pass_through ? 1 : 0;
iommu_detected = 1;
@@ -3012,12 +3144,6 @@ static void amd_iommu_domain_free(struct iommu_domain *dom)
switch (dom->type) {
case IOMMU_DOMAIN_DMA:
- /*
- * First make sure the domain is no longer referenced from the
- * flush queue
- */
- queue_flush_all();
-
/* Now release the domain */
dma_dom = to_dma_ops_domain(domain);
dma_ops_domain_free(dma_dom);
@@ -4281,7 +4407,7 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
irte_info->index);
}
-static struct irq_domain_ops amd_ir_domain_ops = {
+static const struct irq_domain_ops amd_ir_domain_ops = {
.alloc = irq_remapping_alloc,
.free = irq_remapping_free,
.activate = irq_remapping_activate,
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 5a11328f4d98..5cc597b383c7 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -29,6 +29,7 @@
#include <linux/export.h>
#include <linux/iommu.h>
#include <linux/kmemleak.h>
+#include <linux/crash_dump.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
#include <asm/gart.h>
@@ -236,6 +237,7 @@ enum iommu_init_state {
IOMMU_INITIALIZED,
IOMMU_NOT_FOUND,
IOMMU_INIT_ERROR,
+ IOMMU_CMDLINE_DISABLED,
};
/* Early ioapic and hpet maps from kernel command line */
@@ -588,6 +590,8 @@ void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu)
writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
+ iommu->cmd_buf_head = 0;
+ iommu->cmd_buf_tail = 0;
iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
}
@@ -1898,6 +1902,14 @@ static void init_device_table_dma(void)
for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
set_dev_entry_bit(devid, DEV_ENTRY_VALID);
set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
+ /*
+ * In kdump kernels in-flight DMA from the old kernel might
+ * cause IO_PAGE_FAULTs. There are no reports that a kdump
+ * actually failed because of that, so just disable fault
+ * reporting in the hardware to get rid of the messages
+ */
+ if (is_kdump_kernel())
+ set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
}
}
@@ -2097,23 +2109,27 @@ static struct syscore_ops amd_iommu_syscore_ops = {
.resume = amd_iommu_resume,
};
-static void __init free_on_init_error(void)
+static void __init free_iommu_resources(void)
{
kmemleak_free(irq_lookup_table);
free_pages((unsigned long)irq_lookup_table,
get_order(rlookup_table_siz