summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-09-09 15:03:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-09-09 15:03:24 -0700
commit4dfc2788033d30dfccfd4268e06dd73ce2c654ed (patch)
treef6675b959f4aa12e64e68383874683c4cea46334 /drivers
parenta59e57da49f7c3f3de8cf4b7568a0c6c82f5b242 (diff)
parent47b59d8e40850a05370ee9198ea5e505d89489f1 (diff)
Merge tag 'iommu-updates-v4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu
Pull IOMMU updates from Joerg Roedel: "Slightly more changes than usual this time: - KDump Kernel IOMMU take-over code for AMD IOMMU. The code now tries to preserve the mappings of the kernel so that master aborts for devices are avoided. Master aborts cause some devices to fail in the kdump kernel, so this code makes the dump more likely to succeed when AMD IOMMU is enabled. - common flush queue implementation for IOVA code users. The code is still optional, but AMD and Intel IOMMU drivers had their own implementation which is now unified. - finish support for iommu-groups. All drivers implement this feature now so that IOMMU core code can rely on it. - finish support for 'struct iommu_device' in iommu drivers. All drivers now use the interface. - new functions in the IOMMU-API for explicit IO/TLB flushing. This will help to reduce the number of IO/TLB flushes when IOMMU drivers support this interface. - support for mt2712 in the Mediatek IOMMU driver - new IOMMU driver for QCOM hardware - system PM support for ARM-SMMU - shutdown method for ARM-SMMU-v3 - some constification patches - various other small improvements and fixes" * tag 'iommu-updates-v4.14' of git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu: (87 commits) iommu/vt-d: Don't be too aggressive when clearing one context entry iommu: Introduce Interface for IOMMU TLB Flushing iommu/s390: Constify iommu_ops iommu/vt-d: Avoid calling virt_to_phys() on null pointer iommu/vt-d: IOMMU Page Request needs to check if address is canonical. arm/tegra: Call bus_set_iommu() after iommu_device_register() iommu/exynos: Constify iommu_ops iommu/ipmmu-vmsa: Make ipmmu_gather_ops const iommu/ipmmu-vmsa: Rereserving a free context before setting up a pagetable iommu/amd: Rename a few flush functions iommu/amd: Check if domain is NULL in get_domain() and return -EBUSY iommu/mediatek: Fix a build warning of BIT(32) in ARM iommu/mediatek: Fix a build fail of m4u_type iommu: qcom: annotate PM functions as __maybe_unused iommu/pamu: Fix PAMU boot crash memory: mtk-smi: Degrade SMI init to module_init iommu/mediatek: Enlarge the validate PA range for 4GB mode iommu/mediatek: Disable iommu clock when system suspend iommu/mediatek: Move pgtable allocation into domain_alloc iommu/mediatek: Merge 2 M4U HWs into one iommu domain ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/iommu/Kconfig13
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd_iommu.c313
-rw-r--r--drivers/iommu/amd_iommu_init.c223
-rw-r--r--drivers/iommu/amd_iommu_proto.h2
-rw-r--r--drivers/iommu/amd_iommu_types.h55
-rw-r--r--drivers/iommu/amd_iommu_v2.c18
-rw-r--r--drivers/iommu/arm-smmu-regs.h220
-rw-r--r--drivers/iommu/arm-smmu-v3.c7
-rw-r--r--drivers/iommu/arm-smmu.c384
-rw-r--r--drivers/iommu/dmar.c2
-rw-r--r--drivers/iommu/exynos-iommu.c44
-rw-r--r--drivers/iommu/fsl_pamu.c27
-rw-r--r--drivers/iommu/fsl_pamu_domain.c28
-rw-r--r--drivers/iommu/intel-iommu.c280
-rw-r--r--drivers/iommu/intel-svm.c14
-rw-r--r--drivers/iommu/iommu.c59
-rw-r--r--drivers/iommu/iova.c183
-rw-r--r--drivers/iommu/ipmmu-vmsa.c242
-rw-r--r--drivers/iommu/msm_iommu.c15
-rw-r--r--drivers/iommu/mtk_iommu.c214
-rw-r--r--drivers/iommu/mtk_iommu.h9
-rw-r--r--drivers/iommu/of_iommu.c144
-rw-r--r--drivers/iommu/omap-iommu.c125
-rw-r--r--drivers/iommu/omap-iommu.h1
-rw-r--r--drivers/iommu/qcom_iommu.c930
-rw-r--r--drivers/iommu/rockchip-iommu.c52
-rw-r--r--drivers/iommu/s390-iommu.c37
-rw-r--r--drivers/iommu/tegra-gart.c45
-rw-r--r--drivers/iommu/tegra-smmu.c39
-rw-r--r--drivers/memory/mtk-smi.c96
31 files changed, 2598 insertions, 1224 deletions
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index f73ff28f77e2..49bd2ab8c507 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -76,6 +76,8 @@ config IOMMU_DMA
config FSL_PAMU
bool "Freescale IOMMU support"
+ depends on PCI
+ depends on PHYS_64BIT
depends on PPC_E500MC || (COMPILE_TEST && PPC)
select IOMMU_API
select GENERIC_ALLOCATOR
@@ -253,6 +255,7 @@ config TEGRA_IOMMU_SMMU
config EXYNOS_IOMMU
bool "Exynos IOMMU Support"
depends on ARCH_EXYNOS && MMU
+ depends on !CPU_BIG_ENDIAN # revisit driver if we can enable big-endian ptes
select IOMMU_API
select ARM_DMA_USE_IOMMU
help
@@ -367,4 +370,14 @@ config MTK_IOMMU_V1
if unsure, say N here.
+config QCOM_IOMMU
+ # Note: iommu drivers cannot (yet?) be built as modules
+ bool "Qualcomm IOMMU Support"
+ depends on ARCH_QCOM || COMPILE_TEST
+ select IOMMU_API
+ select IOMMU_IO_PGTABLE_LPAE
+ select ARM_DMA_USE_IOMMU
+ help
+ Support for IOMMU on certain Qualcomm SoCs.
+
endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 195f7b997d8e..b910aea813a1 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -27,3 +27,4 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
+obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 4ad7e5e31943..51f8215877f5 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -103,29 +103,6 @@ int amd_iommu_max_glx_val = -1;
static const struct dma_map_ops amd_iommu_dma_ops;
/*
- * This struct contains device specific data for the IOMMU
- */
-struct iommu_dev_data {
- struct list_head list; /* For domain->dev_list */
- struct list_head dev_data_list; /* For global dev_data_list */
- struct protection_domain *domain; /* Domain the device is bound to */
- u16 devid; /* PCI Device ID */
- u16 alias; /* Alias Device ID */
- bool iommu_v2; /* Device can make use of IOMMUv2 */
- bool passthrough; /* Device is identity mapped */
- struct {
- bool enabled;
- int qdep;
- } ats; /* ATS state */
- bool pri_tlp; /* PASID TLB required for
- PPR completions */
- u32 errata; /* Bitmap for errata to apply */
- bool use_vapic; /* Enable device to use vapic mode */
-
- struct ratelimit_state rs; /* Ratelimit IOPF messages */
-};
-
-/*
* general struct to manage commands send to an IOMMU
*/
struct iommu_cmd {
@@ -137,20 +114,7 @@ struct kmem_cache *amd_iommu_irq_cache;
static void update_domain(struct protection_domain *domain);
static int protection_domain_init(struct protection_domain *domain);
static void detach_device(struct device *dev);
-
-#define FLUSH_QUEUE_SIZE 256
-
-struct flush_queue_entry {
- unsigned long iova_pfn;
- unsigned long pages;
- u64 counter; /* Flush counter when this entry was added to the queue */
-};
-
-struct flush_queue {
- struct flush_queue_entry *entries;
- unsigned head, tail;
- spinlock_t lock;
-};
+static void iova_domain_flush_tlb(struct iova_domain *iovad);
/*
* Data container for a dma_ops specific protection domain
@@ -161,36 +125,6 @@ struct dma_ops_domain {
/* IOVA RB-Tree */
struct iova_domain iovad;
-
- struct flush_queue __percpu *flush_queue;
-
- /*
- * We need two counter here to be race-free wrt. IOTLB flushing and
- * adding entries to the flush queue.
- *
- * The flush_start_cnt is incremented _before_ the IOTLB flush starts.
- * New entries added to the flush ring-buffer get their 'counter' value
- * from here. This way we can make sure that entries added to the queue
- * (or other per-cpu queues of the same domain) while the TLB is about
- * to be flushed are not considered to be flushed already.
- */
- atomic64_t flush_start_cnt;
-
- /*
- * The flush_finish_cnt is incremented when an IOTLB flush is complete.
- * This value is always smaller than flush_start_cnt. The queue_add
- * function frees all IOVAs that have a counter value smaller than
- * flush_finish_cnt. This makes sure that we only free IOVAs that are
- * flushed out of the IOTLB of the domain.
- */
- atomic64_t flush_finish_cnt;
-
- /*
- * Timer to make sure we don't keep IOVAs around unflushed
- * for too long
- */
- struct timer_list flush_timer;
- atomic_t flush_timer_on;
};
static struct iova_domain reserved_iova_ranges;
@@ -371,19 +305,25 @@ static u16 get_alias(struct device *dev)
static struct iommu_dev_data *find_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
+ struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
dev_data = search_dev_data(devid);
- if (dev_data == NULL)
+ if (dev_data == NULL) {
dev_data = alloc_dev_data(devid);
+ if (translation_pre_enabled(iommu))
+ dev_data->defer_attach = true;
+ }
+
return dev_data;
}
-static struct iommu_dev_data *get_dev_data(struct device *dev)
+struct iommu_dev_data *get_dev_data(struct device *dev)
{
return dev->archdata.iommu;
}
+EXPORT_SYMBOL(get_dev_data);
/*
* Find or create an IOMMU group for a acpihid device.
@@ -1167,7 +1107,7 @@ static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
return iommu_queue_command(iommu, &cmd);
}
-static void iommu_flush_dte_all(struct amd_iommu *iommu)
+static void amd_iommu_flush_dte_all(struct amd_iommu *iommu)
{
u32 devid;
@@ -1181,7 +1121,7 @@ static void iommu_flush_dte_all(struct amd_iommu *iommu)
* This function uses heavy locking and may disable irqs for some time. But
* this is no issue because it is only called during resume.
*/
-static void iommu_flush_tlb_all(struct amd_iommu *iommu)
+static void amd_iommu_flush_tlb_all(struct amd_iommu *iommu)
{
u32 dom_id;
@@ -1195,7 +1135,7 @@ static void iommu_flush_tlb_all(struct amd_iommu *iommu)
iommu_completion_wait(iommu);
}
-static void iommu_flush_all(struct amd_iommu *iommu)
+static void amd_iommu_flush_all(struct amd_iommu *iommu)
{
struct iommu_cmd cmd;
@@ -1214,7 +1154,7 @@ static void iommu_flush_irt(struct amd_iommu *iommu, u16 devid)
iommu_queue_command(iommu, &cmd);
}
-static void iommu_flush_irt_all(struct amd_iommu *iommu)
+static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
{
u32 devid;
@@ -1227,11 +1167,11 @@ static void iommu_flush_irt_all(struct amd_iommu *iommu)
void iommu_flush_all_caches(struct amd_iommu *iommu)
{
if (iommu_feature(iommu, FEATURE_IA)) {
- iommu_flush_all(iommu);
+ amd_iommu_flush_all(iommu);
} else {
- iommu_flush_dte_all(iommu);
- iommu_flush_irt_all(iommu);
- iommu_flush_tlb_all(iommu);
+ amd_iommu_flush_dte_all(iommu);
+ amd_iommu_flush_irt_all(iommu);
+ amd_iommu_flush_tlb_all(iommu);
}
}
@@ -1539,9 +1479,9 @@ static int iommu_map_page(struct protection_domain *dom,
if (count > 1) {
__pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
- __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_P | IOMMU_PTE_FC;
+ __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
} else
- __pte = __sme_set(phys_addr) | IOMMU_PTE_P | IOMMU_PTE_FC;
+ __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
if (prot & IOMMU_PROT_IR)
__pte |= IOMMU_PTE_IR;
@@ -1790,178 +1730,19 @@ static void free_gcr3_table(struct protection_domain *domain)
free_page((unsigned long)domain->gcr3_tbl);
}
-static void dma_ops_domain_free_flush_queue(struct dma_ops_domain *dom)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct flush_queue *queue;
-
- queue = per_cpu_ptr(dom->flush_queue, cpu);
- kfree(queue->entries);
- }
-
- free_percpu(dom->flush_queue);
-
- dom->flush_queue = NULL;
-}
-
-static int dma_ops_domain_alloc_flush_queue(struct dma_ops_domain *dom)
-{
- int cpu;
-
- atomic64_set(&dom->flush_start_cnt, 0);
- atomic64_set(&dom->flush_finish_cnt, 0);
-
- dom->flush_queue = alloc_percpu(struct flush_queue);
- if (!dom->flush_queue)
- return -ENOMEM;
-
- /* First make sure everything is cleared */
- for_each_possible_cpu(cpu) {
- struct flush_queue *queue;
-
- queue = per_cpu_ptr(dom->flush_queue, cpu);
- queue->head = 0;
- queue->tail = 0;
- queue->entries = NULL;
- }
-
- /* Now start doing the allocation */
- for_each_possible_cpu(cpu) {
- struct flush_queue *queue;
-
- queue = per_cpu_ptr(dom->flush_queue, cpu);
- queue->entries = kzalloc(FLUSH_QUEUE_SIZE * sizeof(*queue->entries),
- GFP_KERNEL);
- if (!queue->entries) {
- dma_ops_domain_free_flush_queue(dom);
- return -ENOMEM;
- }
-
- spin_lock_init(&queue->lock);
- }
-
- return 0;
-}
-
static void dma_ops_domain_flush_tlb(struct dma_ops_domain *dom)
{
- atomic64_inc(&dom->flush_start_cnt);
domain_flush_tlb(&dom->domain);
domain_flush_complete(&dom->domain);
- atomic64_inc(&dom->flush_finish_cnt);
-}
-
-static inline bool queue_ring_full(struct flush_queue *queue)
-{
- assert_spin_locked(&queue->lock);
-
- return (((queue->tail + 1) % FLUSH_QUEUE_SIZE) == queue->head);
-}
-
-#define queue_ring_for_each(i, q) \
- for (i = (q)->head; i != (q)->tail; i = (i + 1) % FLUSH_QUEUE_SIZE)
-
-static inline unsigned queue_ring_add(struct flush_queue *queue)
-{
- unsigned idx = queue->tail;
-
- assert_spin_locked(&queue->lock);
- queue->tail = (idx + 1) % FLUSH_QUEUE_SIZE;
-
- return idx;
-}
-
-static inline void queue_ring_remove_head(struct flush_queue *queue)
-{
- assert_spin_locked(&queue->lock);
- queue->head = (queue->head + 1) % FLUSH_QUEUE_SIZE;
-}
-
-static void queue_ring_free_flushed(struct dma_ops_domain *dom,
- struct flush_queue *queue)
-{
- u64 counter = atomic64_read(&dom->flush_finish_cnt);
- int idx;
-
- queue_ring_for_each(idx, queue) {
- /*
- * This assumes that counter values in the ring-buffer are
- * monotonously rising.
- */
- if (queue->entries[idx].counter >= counter)
- break;
-
- free_iova_fast(&dom->iovad,
- queue->entries[idx].iova_pfn,
- queue->entries[idx].pages);
-
- queue_ring_remove_head(queue);
- }
-}
-
-static void queue_add(struct dma_ops_domain *dom,
- unsigned long address, unsigned long pages)
-{
- struct flush_queue *queue;
- unsigned long flags;
- int idx;
-
- pages = __roundup_pow_of_two(pages);
- address >>= PAGE_SHIFT;
-
- queue = get_cpu_ptr(dom->flush_queue);
- spin_lock_irqsave(&queue->lock, flags);
-
- /*
- * First remove the enries from the ring-buffer that are already
- * flushed to make the below queue_ring_full() check less likely
- */
- queue_ring_free_flushed(dom, queue);
-
- /*
- * When ring-queue is full, flush the entries from the IOTLB so
- * that we can free all entries with queue_ring_free_flushed()
- * below.
- */
- if (queue_ring_full(queue)) {
- dma_ops_domain_flush_tlb(dom);
- queue_ring_free_flushed(dom, queue);
- }
-
- idx = queue_ring_add(queue);
-
- queue->entries[idx].iova_pfn = address;
- queue->entries[idx].pages = pages;
- queue->entries[idx].counter = atomic64_read(&dom->flush_start_cnt);
-
- spin_unlock_irqrestore(&queue->lock, flags);
-
- if (atomic_cmpxchg(&dom->flush_timer_on, 0, 1) == 0)
- mod_timer(&dom->flush_timer, jiffies + msecs_to_jiffies(10));
-
- put_cpu_ptr(dom->flush_queue);
}
-static void queue_flush_timeout(unsigned long data)
+static void iova_domain_flush_tlb(struct iova_domain *iovad)
{
- struct dma_ops_domain *dom = (struct dma_ops_domain *)data;
- int cpu;
+ struct dma_ops_domain *dom;
- atomic_set(&dom->flush_timer_on, 0);
+ dom = container_of(iovad, struct dma_ops_domain, iovad);
dma_ops_domain_flush_tlb(dom);
-
- for_each_possible_cpu(cpu) {
- struct flush_queue *queue;
- unsigned long flags;
-
- queue = per_cpu_ptr(dom->flush_queue, cpu);
- spin_lock_irqsave(&queue->lock, flags);
- queue_ring_free_flushed(dom, queue);
- spin_unlock_irqrestore(&queue->lock, flags);
- }
}
/*
@@ -1975,11 +1756,6 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
del_domain_from_list(&dom->domain);
- if (timer_pending(&dom->flush_timer))
- del_timer(&dom->flush_timer);
-
- dma_ops_domain_free_flush_queue(dom);
-
put_iova_domain(&dom->iovad);
free_pagetable(&dom->domain);
@@ -2015,16 +1791,11 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
init_iova_domain(&dma_dom->iovad, PAGE_SIZE,
IOVA_START_PFN, DMA_32BIT_PFN);
- /* Initialize reserved ranges */
- copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
-
- if (dma_ops_domain_alloc_flush_queue(dma_dom))
+ if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL))
goto free_dma_dom;
- setup_timer(&dma_dom->flush_timer, queue_flush_timeout,
- (unsigned long)dma_dom);
-
- atomic_set(&dma_dom->flush_timer_on, 0);
+ /* Initialize reserved ranges */
+ copy_reserved_iova(&reserved_iova_ranges, &dma_dom->iovad);
add_domain_to_list(&dma_dom->domain);
@@ -2055,7 +1826,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
- pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
+ pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
flags = amd_iommu_dev_table[devid].data[1];
@@ -2088,8 +1859,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
flags |= tmp;
}
-
- flags &= ~(DTE_FLAG_SA | 0xffffULL);
+ flags &= ~DEV_DOMID_MASK;
flags |= domain->id;
amd_iommu_dev_table[devid].data[1] = flags;
@@ -2099,7 +1869,7 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain, bool ats)
static void clear_dte_entry(u16 devid)
{
/* remove entry from the device table seen by the hardware */
- amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
+ amd_iommu_dev_table[devid].data[0] = DTE_FLAG_V | DTE_FLAG_TV;
amd_iommu_dev_table[devid].data[1] &= DTE_FLAG_MASK;
amd_iommu_apply_erratum_63(devid);
@@ -2480,11 +2250,21 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
static struct protection_domain *get_domain(struct device *dev)
{
struct protection_domain *domain;
+ struct iommu_domain *io_domain;
if (!check_device(dev))
return ERR_PTR(-EINVAL);
domain = get_dev_data(dev)->domain;
+ if (domain == NULL && get_dev_data(dev)->defer_attach) {
+ get_dev_data(dev)->defer_attach = false;
+ io_domain = iommu_get_domain_for_dev(dev);
+ domain = to_pdomain(io_domain);
+ attach_device(dev, domain);
+ }
+ if (domain == NULL)
+ return ERR_PTR(-EBUSY);
+
if (!dma_ops_domain(domain))
return ERR_PTR(-EBUSY);
@@ -2530,6 +2310,7 @@ static int dir2prot(enum dma_data_direction direction)
else
return 0;
}
+
/*
* This function contains common code for mapping of a physically
* contiguous memory region into DMA address space. It is used by all
@@ -2621,7 +2402,8 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
domain_flush_tlb(&dma_dom->domain);
domain_flush_complete(&dma_dom->domain);
} else {
- queue_add(dma_dom, dma_addr, pages);
+ pages = __roundup_pow_of_two(pages);
+ queue_iova(&dma_dom->iovad, dma_addr >> PAGE_SHIFT, pages, 0);
}
}
@@ -3375,6 +3157,13 @@ static void amd_iommu_apply_resv_region(struct device *dev,
WARN_ON_ONCE(reserve_iova(&dma_dom->iovad, start, end) == NULL);
}
+static bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct iommu_dev_data *dev_data = dev->archdata.iommu;
+ return dev_data->defer_attach;
+}
+
const struct iommu_ops amd_iommu_ops = {
.capable = amd_iommu_capable,
.domain_alloc = amd_iommu_domain_alloc,
@@ -3391,6 +3180,7 @@ const struct iommu_ops amd_iommu_ops = {
.get_resv_regions = amd_iommu_get_resv_regions,
.put_resv_regions = amd_iommu_put_resv_regions,
.apply_resv_region = amd_iommu_apply_resv_region,
+ .is_attach_deferred = amd_iommu_is_attach_deferred,
.pgsize_bitmap = AMD_IOMMU_PGSIZES,
};
@@ -3779,11 +3569,6 @@ EXPORT_SYMBOL(amd_iommu_device_info);
static struct irq_chip amd_ir_chip;
-#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
-#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
-#define DTE_IRQ_TABLE_LEN (8ULL << 1)
-#define DTE_IRQ_REMAP_ENABLE 1ULL
-
static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
{
u64 dte;
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 2292a6cece76..382de42b8359 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -29,7 +29,6 @@
#include <linux/export.h>
#include <linux/iommu.h>
#include <linux/kmemleak.h>
-#include <linux/crash_dump.h>
#include <linux/mem_encrypt.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
@@ -39,6 +38,7 @@
#include <asm/io_apic.h>
#include <asm/irq_remapping.h>
+#include <linux/crash_dump.h>
#include "amd_iommu_proto.h"
#include "amd_iommu_types.h"
#include "irq_remapping.h"
@@ -197,6 +197,11 @@ spinlock_t amd_iommu_pd_lock;
* page table root pointer.
*/
struct dev_table_entry *amd_iommu_dev_table;
+/*
+ * Pointer to a device table which the content of old device table
+ * will be copied to. It's only be used in kdump kernel.
+ */
+static struct dev_table_entry *old_dev_tbl_cpy;
/*
* The alias table is a driver specific data structure which contains the
@@ -210,6 +215,7 @@ u16 *amd_iommu_alias_table;
* for a specific device. It is also indexed by the PCI device id.
*/
struct amd_iommu **amd_iommu_rlookup_table;
+EXPORT_SYMBOL(amd_iommu_rlookup_table);
/*
* This table is used to find the irq remapping table for a given device id
@@ -259,6 +265,28 @@ static int amd_iommu_enable_interrupts(void);
static int __init iommu_go_to_state(enum iommu_init_state state);
static void init_device_table_dma(void);
+static bool amd_iommu_pre_enabled = true;
+
+bool translation_pre_enabled(struct amd_iommu *iommu)
+{
+ return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
+}
+EXPORT_SYMBOL(translation_pre_enabled);
+
+static void clear_translation_pre_enabled(struct amd_iommu *iommu)
+{
+ iommu->flags &= ~AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
+static void init_translation_status(struct amd_iommu *iommu)
+{
+ u32 ctrl;
+
+ ctrl = readl(iommu->mmio_base + MMIO_CONTROL_OFFSET);
+ if (ctrl & (1<<CONTROL_IOMMU_EN))
+ iommu->flags |= AMD_IOMMU_FLAG_TRANS_PRE_ENABLED;
+}
+
static inline void update_last_devid(u16 devid)
{
if (devid > amd_iommu_last_bdf)
@@ -616,6 +644,14 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu)
amd_iommu_reset_cmd_buffer(iommu);
}
+/*
+ * This function disables the command buffer
+ */
+static void iommu_disable_command_buffer(struct amd_iommu *iommu)
+{
+ iommu_feature_disable(iommu, CONTROL_CMDBUF_EN);
+}
+
static void __init free_command_buffer(struct amd_iommu *iommu)
{
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
@@ -648,6 +684,14 @@ static void iommu_enable_event_buffer(struct amd_iommu *iommu)
iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
}
+/*
+ * This function disables the event log buffer
+ */
+static void iommu_disable_event_buffer(struct amd_iommu *iommu)
+{
+ iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN);
+}
+
static void __init free_event_buffer(struct amd_iommu *iommu)
{
free_pages((unsigned long)iommu->evt_buf, get_order(EVT_BUFFER_SIZE));
@@ -809,6 +853,96 @@ static int get_dev_entry_bit(u16 devid, u8 bit)
}
+static bool copy_device_table(void)
+{
+ u64 int_ctl, int_tab_len, entry = 0, last_entry = 0;
+ struct dev_table_entry *old_devtb = NULL;
+ u32 lo, hi, devid, old_devtb_size;
+ phys_addr_t old_devtb_phys;
+ struct amd_iommu *iommu;
+ u16 dom_id, dte_v, irq_v;
+ gfp_t gfp_flag;
+ u64 tmp;
+
+ if (!amd_iommu_pre_enabled)
+ return false;
+
+ pr_warn("Translation is already enabled - trying to copy translation structures\n");
+ for_each_iommu(iommu) {
+ /* All IOMMUs should use the same device table with the same size */
+ lo = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET);
+ hi = readl(iommu->mmio_base + MMIO_DEV_TABLE_OFFSET + 4);
+ entry = (((u64) hi) << 32) + lo;
+ if (last_entry && last_entry != entry) {
+ pr_err("IOMMU:%d should use the same dev table as others!/n",
+ iommu->index);
+ return false;
+ }
+ last_entry = entry;
+
+ old_devtb_size = ((entry & ~PAGE_MASK) + 1) << 12;
+ if (old_devtb_size != dev_table_size) {
+ pr_err("The device table size of IOMMU:%d is not expected!/n",
+ iommu->index);
+ return false;
+ }
+ }
+
+ old_devtb_phys = entry & PAGE_MASK;
+ if (old_devtb_phys >= 0x100000000ULL) {
+ pr_err("The address of old device table is above 4G, not trustworthy!/n");
+ return false;
+ }
+ old_devtb = memremap(old_devtb_phys, dev_table_size, MEMREMAP_WB);
+ if (!old_devtb)
+ return false;
+
+ gfp_flag = GFP_KERNEL | __GFP_ZERO | GFP_DMA32;
+ old_dev_tbl_cpy = (void *)__get_free_pages(gfp_flag,
+ get_order(dev_table_size));
+ if (old_dev_tbl_cpy == NULL) {
+ pr_err("Failed to allocate memory for copying old device table!/n");
+ return false;
+ }
+
+ for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
+ old_dev_tbl_cpy[devid] = old_devtb[devid];
+ dom_id = old_devtb[devid].data[1] & DEV_DOMID_MASK;
+ dte_v = old_devtb[devid].data[0] & DTE_FLAG_V;
+
+ if (dte_v && dom_id) {
+ old_dev_tbl_cpy[devid].data[0] = old_devtb[devid].data[0];
+ old_dev_tbl_cpy[devid].data[1] = old_devtb[devid].data[1];
+ __set_bit(dom_id, amd_iommu_pd_alloc_bitmap);
+ /* If gcr3 table existed, mask it out */
+ if (old_devtb[devid].data[0] & DTE_FLAG_GV) {
+ tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
+ tmp |= DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+ old_dev_tbl_cpy[devid].data[1] &= ~tmp;
+ tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
+ tmp |= DTE_FLAG_GV;
+ old_dev_tbl_cpy[devid].data[0] &= ~tmp;
+ }
+ }
+
+ irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
+ int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
+ int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK;
+ if (irq_v && (int_ctl || int_tab_len)) {
+ if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
+ (int_tab_len != DTE_IRQ_TABLE_LEN)) {
+ pr_err("Wrong old irq remapping flag: %#x\n", devid);
+ return false;
+ }
+
+ old_dev_tbl_cpy[devid].data[2] = old_devtb[devid].data[2];
+ }
+ }
+ memunmap(old_devtb);
+
+ return true;
+}
+
void amd_iommu_apply_erratum_63(u16 devid)
{
int sysmgt;
@@ -1400,6 +1534,16 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
iommu->int_enabled = false;
+ init_translation_status(iommu);
+ if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
+ iommu_disable(iommu);
+ clear_translation_pre_enabled(iommu);
+ pr_warn("Translation was enabled for IOMMU:%d but we are not in kdump mode\n",
+ iommu->index);
+ }
+ if (amd_iommu_pre_enabled)
+ amd_iommu_pre_enabled = translation_pre_enabled(iommu);
+
ret = init_iommu_from_acpi(iommu, h);
if (ret)
return ret;
@@ -1893,8 +2037,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table)
}
/*
- * Init the device table to not allow DMA access for devices and
- * suppress all page faults
+ * Init the device table to not allow DMA access for devices
*/
static void init_device_table_dma(void)
{
@@ -1903,14 +2046,6 @@ static void init_device_table_dma(void)
for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) {
set_dev_entry_bit(devid, DEV_ENTRY_VALID);
set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION);
- /*
- * In kdump kernels in-flight DMA from the old kernel might
- * cause IO_PAGE_FAULTs. There are no reports that a kdump
- * actually failed because of that, so just disable fault
- * reporting in the hardware to get rid of the messages
- */
- if (is_kdump_kernel())
- set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT);
}
}
@@ -2023,24 +2158,62 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
#endif
}
+static void early_enable_iommu(struct amd_iommu *iommu)
+{
+ iommu_disable(iommu);
+ iommu_init_flags(iommu);
+ iommu_set_device_table(iommu);
+ iommu_enable_command_buffer(iommu);
+ iommu_enable_event_buffer(iommu);
+ iommu_set_exclusion_range(iommu);
+ iommu_enable_ga(iommu);
+ iommu_enable(iommu);
+ iommu_flush_all_caches(iommu);
+}
+
/*
* This function finally enables all IOMMUs found in the system after
- * they have been initialized
+ * they have been initialized.
+ *
+ * Or if in kdump kernel and IOMMUs are all pre-enabled, try to copy
+ * the old content of device table entries. Not this case or copy failed,
+ * just continue as normal kernel does.
*/
static void early_enable_iommus(void)
{
struct amd_iommu *iommu;
- for_each_iommu(iommu) {
- iommu_disable(iommu);
- iommu_init_flags(iommu);
- iommu_set_device_table(iommu);
- iommu_enable_command_buffer(iommu);
- iommu_enable_event_buffer(iommu);