summaryrefslogtreecommitdiffstats
path: root/drivers/iommu
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-22 17:59:09 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-22 17:59:09 -0700
commitd70b3ef54ceaf1c7c92209f5a662a670d04cbed9 (patch)
tree0f38109c1cabe9e2df028041c1e30f36c803ec5b /drivers/iommu
parent650ec5a6bd5df4ab0c9ef38d05b94cd82fb99ad8 (diff)
parent7ef3d7d58d9dc73ee3d4f8f56d0024c8cca8163f (diff)
Merge branch 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 core updates from Ingo Molnar: "There were so many changes in the x86/asm, x86/apic and x86/mm topics in this cycle that the topical separation of -tip broke down somewhat - so the result is a more traditional architecture pull request, collected into the 'x86/core' topic. The topics were still maintained separately as far as possible, so bisectability and conceptual separation should still be pretty good - but there were a handful of merge points to avoid excessive dependencies (and conflicts) that would have been poorly tested in the end. The next cycle will hopefully be much more quiet (or at least will have fewer dependencies). The main changes in this cycle were: * x86/apic changes, with related IRQ core changes: (Jiang Liu, Thomas Gleixner) - This is the second and most intrusive part of changes to the x86 interrupt handling - full conversion to hierarchical interrupt domains: [IOAPIC domain] ----- | [MSI domain] --------[Remapping domain] ----- [ Vector domain ] | (optional) | [HPET MSI domain] ----- | | [DMAR domain] ----------------------------- | [Legacy domain] ----------------------------- This now reflects the actual hardware and allowed us to distangle the domain specific code from the underlying parent domain, which can be optional in the case of interrupt remapping. It's a clear separation of functionality and removes quite some duct tape constructs which plugged the remap code between ioapic/msi/hpet and the vector management. - Intel IOMMU IRQ remapping enhancements, to allow direct interrupt injection into guests (Feng Wu) * x86/asm changes: - Tons of cleanups and small speedups, micro-optimizations. This is in preparation to move a good chunk of the low level entry code from assembly to C code (Denys Vlasenko, Andy Lutomirski, Brian Gerst) - Moved all system entry related code to a new home under arch/x86/entry/ (Ingo Molnar) - Removal of the fragile and ugly CFI dwarf debuginfo annotations. Conversion to C will reintroduce many of them - but meanwhile they are only getting in the way, and the upstream kernel does not rely on them (Ingo Molnar) - NOP handling refinements. (Borislav Petkov) * x86/mm changes: - Big PAT and MTRR rework: making the code more robust and preparing to phase out exposing direct MTRR interfaces to drivers - in favor of using PAT driven interfaces (Toshi Kani, Luis R Rodriguez, Borislav Petkov) - New ioremap_wt()/set_memory_wt() interfaces to support Write-Through cached memory mappings. This is especially important for good performance on NVDIMM hardware (Toshi Kani) * x86/ras changes: - Add support for deferred errors on AMD (Aravind Gopalakrishnan) This is an important RAS feature which adds hardware support for poisoned data. That means roughly that the hardware marks data which it has detected as corrupted but wasn't able to correct, as poisoned data and raises an APIC interrupt to signal that in the form of a deferred error. It is the OS's responsibility then to take proper recovery action and thus prolonge system lifetime as far as possible. - Add support for Intel "Local MCE"s: upcoming CPUs will support CPU-local MCE interrupts, as opposed to the traditional system- wide broadcasted MCE interrupts (Ashok Raj) - Misc cleanups (Borislav Petkov) * x86/platform changes: - Intel Atom SoC updates ... and lots of other cleanups, fixlets and other changes - see the shortlog and the Git log for details" * 'x86-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (222 commits) x86/hpet: Use proper hpet device number for MSI allocation x86/hpet: Check for irq==0 when allocating hpet MSI interrupts x86/mm/pat, drivers/infiniband/ipath: Use arch_phys_wc_add() and require PAT disabled x86/mm/pat, drivers/media/ivtv: Use arch_phys_wc_add() and require PAT disabled x86/platform/intel/baytrail: Add comments about why we disabled HPET on Baytrail genirq: Prevent crash in irq_move_irq() genirq: Enhance irq_data_to_desc() to support hierarchy irqdomain iommu, x86: Properly handle posted interrupts for IOMMU hotplug iommu, x86: Provide irq_remapping_cap() interface iommu, x86: Setup Posted-Interrupts capability for Intel iommu iommu, x86: Add cap_pi_support() to detect VT-d PI capability iommu, x86: Avoid migrating VT-d posted interrupts iommu, x86: Save the mode (posted or remapped) of an IRTE iommu, x86: Implement irq_set_vcpu_affinity for intel_ir_chip iommu: dmar: Provide helper to copy shared irte fields iommu: dmar: Extend struct irte for VT-d Posted-Interrupts iommu: Add new member capability to struct irq_remap_ops x86/asm/entry/64: Disentangle error_entry/exit gsbase/ebx/usermode code x86/asm/entry/32: Shorten __audit_syscall_entry() args preparation x86/asm/entry/32: Explain reloading of registers after __audit_syscall_entry() ...
Diffstat (limited to 'drivers/iommu')
-rw-r--r--drivers/iommu/amd_iommu.c477
-rw-r--r--drivers/iommu/amd_iommu_init.c4
-rw-r--r--drivers/iommu/amd_iommu_proto.h9
-rw-r--r--drivers/iommu/amd_iommu_types.h5
-rw-r--r--drivers/iommu/dmar.c19
-rw-r--r--drivers/iommu/intel_irq_remapping.c648
-rw-r--r--drivers/iommu/irq_remapping.c253
-rw-r--r--drivers/iommu/irq_remapping.h42
8 files changed, 711 insertions, 746 deletions
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index e1c7e9e51045..fffea87a014f 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -34,6 +34,7 @@
#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/dma-contiguous.h>
+#include <linux/irqdomain.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
@@ -3852,6 +3853,21 @@ union irte {
} fields;
};
+struct irq_2_irte {
+ u16 devid; /* Device ID for IRTE table */
+ u16 index; /* Index into IRTE table*/
+};
+
+struct amd_ir_data {
+ struct irq_2_irte irq_2_irte;
+ union irte irte_entry;
+ union {
+ struct msi_msg msi_entry;
+ };
+};
+
+static struct irq_chip amd_ir_chip;
+
#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
#define DTE_IRQ_TABLE_LEN (8ULL << 1)
@@ -3945,7 +3961,7 @@ out_unlock:
return table;
}
-static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count)
+static int alloc_irq_index(u16 devid, int count)
{
struct irq_remap_table *table;
unsigned long flags;
@@ -3967,18 +3983,10 @@ static int alloc_irq_index(struct irq_cfg *cfg, u16 devid, int count)
c = 0;
if (c == count) {
- struct irq_2_irte *irte_info;
-
for (; c != 0; --c)
table->table[index - c + 1] = IRTE_ALLOCATED;
index -= count - 1;
-
- cfg->remapped = 1;
- irte_info = &cfg->irq_2_irte;
- irte_info->devid = devid;
- irte_info->index = index;
-
goto out;
}
}
@@ -3991,22 +3999,6 @@ out:
return index;
}
-static int get_irte(u16 devid, int index, union irte *irte)
-{
- struct irq_remap_table *table;
- unsigned long flags;
-
- table = get_irq_table(devid, false);
- if (!table)
- return -ENOMEM;
-
- spin_lock_irqsave(&table->lock, flags);
- irte->val = table->table[index];
- spin_unlock_irqrestore(&table->lock, flags);
-
- return 0;
-}
-
static int modify_irte(u16 devid, int index, union irte irte)
{
struct irq_remap_table *table;
@@ -4053,243 +4045,316 @@ static void free_irte(u16 devid, int index)
iommu_completion_wait(iommu);
}
-static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
+static int get_devid(struct irq_alloc_info *info)
{
- struct irq_remap_table *table;
- struct irq_2_irte *irte_info;
- struct irq_cfg *cfg;
- union irte irte;
- int ioapic_id;
- int index;
- int devid;
- int ret;
-
- cfg = irq_cfg(irq);
- if (!cfg)
- return -EINVAL;
-
- irte_info = &cfg->irq_2_irte;
- ioapic_id = mpc_ioapic_id(attr->ioapic);
- devid = get_ioapic_devid(ioapic_id);
-
- if (devid < 0)
- return devid;
-
- table = get_irq_table(devid, true);
- if (table == NULL)
- return -ENOMEM;
-
- index = attr->ioapic_pin;
+ int devid = -1;
- /* Setup IRQ remapping info */
- cfg->remapped = 1;
- irte_info->devid = devid;
- irte_info->index = index;
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_IOAPIC:
+ devid = get_ioapic_devid(info->ioapic_id);
+ break;
+ case X86_IRQ_ALLOC_TYPE_HPET:
+ devid = get_hpet_devid(info->hpet_id);
+ break;
+ case X86_IRQ_ALLOC_TYPE_MSI:
+ case X86_IRQ_ALLOC_TYPE_MSIX:
+ devid = get_device_id(&info->msi_dev->dev);
+ break;
+ default:
+ BUG_ON(1);
+ break;
+ }
- /* Setup IRTE for IOMMU */
- irte.val = 0;
- irte.fields.vector = vector;
- irte.fields.int_type = apic->irq_delivery_mode;
- irte.fields.destination = destination;
- irte.fields.dm = apic->irq_dest_mode;
- irte.fields.valid = 1;
-
- ret = modify_irte(devid, index, irte);
- if (ret)
- return ret;
+ return devid;
+}
- /* Setup IOAPIC entry */
- memset(entry, 0, sizeof(*entry));
+static struct irq_domain *get_ir_irq_domain(struct irq_alloc_info *info)
+{
+ struct amd_iommu *iommu;
+ int devid;
- entry->vector = index;
- entry->mask = 0;
- entry->trigger = attr->trigger;
- entry->polarity = attr->polarity;
+ if (!info)
+ return NULL;
- /*
- * Mask level triggered irqs.
- */
- if (attr->trigger)
- entry->mask = 1;
+ devid = get_devid(info);
+ if (devid >= 0) {
+ iommu = amd_iommu_rlookup_table[devid];
+ if (iommu)
+ return iommu->ir_domain;
+ }
- return 0;
+ return NULL;
}
-static int set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
+static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
{
- struct irq_2_irte *irte_info;
- unsigned int dest, irq;
- struct irq_cfg *cfg;
- union irte irte;
- int err;
-
- if (!config_enabled(CONFIG_SMP))
- return -1;
-
- cfg = irqd_cfg(data);
- irq = data->irq;
- irte_info = &cfg->irq_2_irte;
+ struct amd_iommu *iommu;
+ int devid;
- if (!cpumask_intersects(mask, cpu_online_mask))
- return -EINVAL;
+ if (!info)
+ return NULL;
- if (get_irte(irte_info->devid, irte_info->index, &irte))
- return -EBUSY;
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_MSI:
+ case X86_IRQ_ALLOC_TYPE_MSIX:
+ devid = get_device_id(&info->msi_dev->dev);
+ if (devid >= 0) {
+ iommu = amd_iommu_rlookup_table[devid];
+ if (iommu)
+ return iommu->msi_domain;
+ }
+ break;
+ default:
+ break;
+ }
- if (assign_irq_vector(irq, cfg, mask))
- return -EBUSY;
+ return NULL;
+}
- err = apic->cpu_mask_to_apicid_and(cfg->domain, mask, &dest);
- if (err) {
- if (assign_irq_vector(irq, cfg, data->affinity))
- pr_err("AMD-Vi: Failed to recover vector for irq %d\n", irq);
- return err;
- }
+struct irq_remap_ops amd_iommu_irq_ops = {
+ .prepare = amd_iommu_prepare,
+ .enable = amd_iommu_enable,
+ .disable = amd_iommu_disable,
+ .reenable = amd_iommu_reenable,
+ .enable_faulting = amd_iommu_enable_faulting,
+ .get_ir_irq_domain = get_ir_irq_domain,
+ .get_irq_domain = get_irq_domain,
+};
- irte.fields.vector = cfg->vector;
- irte.fields.destination = dest;
+static void irq_remapping_prepare_irte(struct amd_ir_data *data,
+ struct irq_cfg *irq_cfg,
+ struct irq_alloc_info *info,
+ int devid, int index, int sub_handle)
+{
+ struct irq_2_irte *irte_info = &data->irq_2_irte;
+ struct msi_msg *msg = &data->msi_entry;
+ union irte *irte = &data->irte_entry;
+ struct IO_APIC_route_entry *entry;
- modify_irte(irte_info->devid, irte_info->index, irte);
+ data->irq_2_irte.devid = devid;
+ data->irq_2_irte.index = index + sub_handle;
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
+ /* Setup IRTE for IOMMU */
+ irte->val = 0;
+ irte->fields.vector = irq_cfg->vector;
+ irte->fields.int_type = apic->irq_delivery_mode;
+ irte->fields.destination = irq_cfg->dest_apicid;
+ irte->fields.dm = apic->irq_dest_mode;
+ irte->fields.valid = 1;
+
+ switch (info->type) {
+ case X86_IRQ_ALLOC_TYPE_IOAPIC:
+ /* Setup IOAPIC entry */
+ entry = info->ioapic_entry;
+ info->ioapic_entry = NULL;
+ memset(entry, 0, sizeof(*entry));
+ entry->vector = index;
+ entry->mask = 0;
+ entry->trigger = info->ioapic_trigger;
+ entry->polarity = info->ioapic_polarity;
+ /* Mask level triggered irqs. */
+ if (info->ioapic_trigger)
+ entry->mask = 1;
+ break;
- cpumask_copy(data->affinity, mask);
+ case X86_IRQ_ALLOC_TYPE_HPET:
+ case X86_IRQ_ALLOC_TYPE_MSI:
+ case X86_IRQ_ALLOC_TYPE_MSIX:
+ msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->address_lo = MSI_ADDR_BASE_LO;
+ msg->data = irte_info->index;
+ break;
- return 0;
+ default:
+ BUG_ON(1);
+ break;
+ }
}
-static int free_irq(int irq)
+static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
{
- struct irq_2_irte *irte_info;
+ struct irq_alloc_info *info = arg;
+ struct irq_data *irq_data;
+ struct amd_ir_data *data;
struct irq_cfg *cfg;
+ int i, ret, devid;
+ int index = -1;
- cfg = irq_cfg(irq);
- if (!cfg)
+ if (!info)
+ return -EINVAL;
+ if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_MSI &&
+ info->type != X86_IRQ_ALLOC_TYPE_MSIX)
return -EINVAL;
- irte_info = &cfg->irq_2_irte;
-
- free_irte(irte_info->devid, irte_info->index);
+ /*
+ * With IRQ remapping enabled, don't need contiguous CPU vectors
+ * to support multiple MSI interrupts.
+ */
+ if (info->type == X86_IRQ_ALLOC_TYPE_MSI)
+ info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS;
- return 0;
-}
+ devid = get_devid(info);
+ if (devid < 0)
+ return -EINVAL;
-static void compose_msi_msg(struct pci_dev *pdev,
- unsigned int irq, unsigned int dest,
- struct msi_msg *msg, u8 hpet_id)
-{
- struct irq_2_irte *irte_info;
- struct irq_cfg *cfg;
- union irte irte;
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret < 0)
+ return ret;
- cfg = irq_cfg(irq);
- if (!cfg)
- return;
+ ret = -ENOMEM;
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out_free_parent;
- irte_info = &cfg->irq_2_irte;
+ if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) {
+ if (get_irq_table(devid, true))
+ index = info->ioapic_pin;
+ else
+ ret = -ENOMEM;
+ } else {
+ index = alloc_irq_index(devid, nr_irqs);
+ }
+ if (index < 0) {
+ pr_warn("Failed to allocate IRTE\n");
+ kfree(data);
+ goto out_free_parent;
+ }
- irte.val = 0;
- irte.fields.vector = cfg->vector;
- irte.fields.int_type = apic->irq_delivery_mode;
- irte.fields.destination = dest;
- irte.fields.dm = apic->irq_dest_mode;
- irte.fields.valid = 1;
+ for (i = 0; i < nr_irqs; i++) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+ cfg = irqd_cfg(irq_data);
+ if (!irq_data || !cfg) {
+ ret = -EINVAL;
+ goto out_free_data;
+ }
- modify_irte(irte_info->devid, irte_info->index, irte);
+ if (i > 0) {
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out_free_data;
+ }
+ irq_data->hwirq = (devid << 16) + i;
+ irq_data->chip_data = data;
+ irq_data->chip = &amd_ir_chip;
+ irq_remapping_prepare_irte(data, cfg, info, devid, index, i);
+ irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
+ }
+ return 0;
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo = MSI_ADDR_BASE_LO;
- msg->data = irte_info->index;
+out_free_data:
+ for (i--; i >= 0; i--) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+ if (irq_data)
+ kfree(irq_data->chip_data);
+ }
+ for (i = 0; i < nr_irqs; i++)
+ free_irte(devid, index + i);
+out_free_parent:
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+ return ret;
}
-static int msi_alloc_irq(struct pci_dev *pdev, int irq, int nvec)
+static void irq_remapping_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
{
- struct irq_cfg *cfg;
- int index;
- u16 devid;
-
- if (!pdev)
- return -EINVAL;
+ struct irq_2_irte *irte_info;
+ struct irq_data *irq_data;
+ struct amd_ir_data *data;
+ int i;
- cfg = irq_cfg(irq);
- if (!cfg)
- return -EINVAL;
+ for (i = 0; i < nr_irqs; i++) {
+ irq_data = irq_domain_get_irq_data(domain, virq + i);
+ if (irq_data && irq_data->chip_data) {
+ data = irq_data->chip_data;
+ irte_info = &data->irq_2_irte;
+ free_irte(irte_info->devid, irte_info->index);
+ kfree(data);
+ }
+ }
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
- devid = get_device_id(&pdev->dev);
- index = alloc_irq_index(cfg, devid, nvec);
+static void irq_remapping_activate(struct irq_domain *domain,
+ struct irq_data *irq_data)
+{
+ struct amd_ir_data *data = irq_data->chip_data;
+ struct irq_2_irte *irte_info = &data->irq_2_irte;
- return index < 0 ? MAX_IRQS_PER_TABLE : index;
+ modify_irte(irte_info->devid, irte_info->index, data->irte_entry);
}
-static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int offset)
+static void irq_remapping_deactivate(struct irq_domain *domain,
+ struct irq_data *irq_data)
{
- struct irq_2_irte *irte_info;
- struct irq_cfg *cfg;
- u16 devid;
+ struct amd_ir_data *data = irq_data->chip_data;
+ struct irq_2_irte *irte_info = &data->irq_2_irte;
+ union irte entry;
- if (!pdev)
- return -EINVAL;
+ entry.val = 0;
+ modify_irte(irte_info->devid, irte_info->index, data->irte_entry);
+}
- cfg = irq_cfg(irq);
- if (!cfg)
- return -EINVAL;
+static struct irq_domain_ops amd_ir_domain_ops = {
+ .alloc = irq_remapping_alloc,
+ .free = irq_remapping_free,
+ .activate = irq_remapping_activate,
+ .deactivate = irq_remapping_deactivate,
+};
- if (index >= MAX_IRQS_PER_TABLE)
- return 0;
+static int amd_ir_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ struct amd_ir_data *ir_data = data->chip_data;
+ struct irq_2_irte *irte_info = &ir_data->irq_2_irte;
+ struct irq_cfg *cfg = irqd_cfg(data);
+ struct irq_data *parent = data->parent_data;
+ int ret;
- devid = get_device_id(&pdev->dev);
- irte_info = &cfg->irq_2_irte;
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
- cfg->remapped = 1;
- irte_info->devid = devid;
- irte_info->index = index + offset;
+ /*
+ * Atomically updates the IRTE with the new destination, vector
+ * and flushes the interrupt entry cache.
+ */
+ ir_data->irte_entry.fields.vector = cfg->vector;
+ ir_data->irte_entry.fields.destination = cfg->dest_apicid;
+ modify_irte(irte_info->devid, irte_info->index, ir_data->irte_entry);
- return 0;
+ /*
+ * After this point, all the interrupts will start arriving
+ * at the new destination. So, time to cleanup the previous
+ * vector allocation.
+ */
+ send_cleanup_vector(cfg);
+
+ return IRQ_SET_MASK_OK_DONE;
}
-static int alloc_hpet_msi(unsigned int irq, unsigned int id)
+static void ir_compose_msi_msg(struct irq_data *irq_data, struct msi_msg *msg)
{
- struct irq_2_irte *irte_info;
- struct irq_cfg *cfg;
- int index, devid;
+ struct amd_ir_data *ir_data = irq_data->chip_data;
- cfg = irq_cfg(irq);
- if (!cfg)
- return -EINVAL;
+ *msg = ir_data->msi_entry;
+}
- irte_info = &cfg->irq_2_irte;
- devid = get_hpet_devid(id);
- if (devid < 0)
- return devid;
+static struct irq_chip amd_ir_chip = {
+ .irq_ack = ir_ack_apic_edge,
+ .irq_set_affinity = amd_ir_set_affinity,
+ .irq_compose_msi_msg = ir_compose_msi_msg,
+};
- index = alloc_irq_index(cfg, devid, 1);
- if (index < 0)
- return index;
+int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
+{
+ iommu->ir_domain = irq_domain_add_tree(NULL, &amd_ir_domain_ops, iommu);
+ if (!iommu->ir_domain)
+ return -ENOMEM;
- cfg->remapped = 1;
- irte_info->devid = devid;
- irte_info->index = index;
+ iommu->ir_domain->parent = arch_get_ir_parent_domain();
+ iommu->msi_domain = arch_create_msi_irq_domain(iommu->ir_domain);
return 0;
}
-
-struct irq_remap_ops amd_iommu_irq_ops = {
- .prepare = amd_iommu_prepare,
- .enable = amd_iommu_enable,
- .disable = amd_iommu_disable,
- .reenable = amd_iommu_reenable,
- .enable_faulting = amd_iommu_enable_faulting,
- .setup_ioapic_entry = setup_ioapic_entry,
- .set_affinity = set_affinity,
- .free_irq = free_irq,
- .compose_msi_msg = compose_msi_msg,
- .msi_alloc_irq = msi_alloc_irq,
- .msi_setup_irq = msi_setup_irq,
- .alloc_hpet_msi = alloc_hpet_msi,
-};
#endif
diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 450ef5001a65..c17df04d7a7f 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -1124,6 +1124,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (ret)
return ret;
+ ret = amd_iommu_create_irq_domain(iommu);
+ if (ret)
+ return ret;
+
/*
* Make sure IOMMU is not considered to translate itself. The IVRS
* table tells us so, but this is a lie!
diff --git a/drivers/iommu/amd_iommu_proto.h b/drivers/iommu/amd_iommu_proto.h
index 72b0fd455e24..0a21142d3639 100644
--- a/drivers/iommu/amd_iommu_proto.h
+++ b/drivers/iommu/amd_iommu_proto.h
@@ -62,6 +62,15 @@ extern u8 amd_iommu_pc_get_max_counters(u16 devid);
extern int amd_iommu_pc_get_set_reg_val(u16 devid, u8 bank, u8 cntr, u8 fxn,
u64 *value, bool is_write);
+#ifdef CONFIG_IRQ_REMAP
+extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
+#else
+static inline int amd_iommu_create_irq_domain(struct amd_iommu *iommu)
+{
+ return 0;
+}
+#endif
+
#define PPR_SUCCESS 0x0
#define PPR_INVALID 0x1
#define PPR_FAILURE 0xf
diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h
index 05030e523771..6533e874c9d7 100644
--- a/drivers/iommu/amd_iommu_types.h
+++ b/drivers/iommu/amd_iommu_types.h
@@ -398,6 +398,7 @@ struct amd_iommu_fault {
struct iommu_domain;
+struct irq_domain;
/*
* This structure contains generic data for IOMMU protection domains
@@ -579,6 +580,10 @@ struct amd_iommu {
/* The maximum PC banks and counters/bank (PCSup=1) */
u8 max_banks;
u8 max_counters;
+#ifdef CONFIG_IRQ_REMAP
+ struct irq_domain *ir_domain;
+ struct irq_domain *msi_domain;
+#endif
};
struct devid_map {
diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 9847613085e1..536f2d8ea41a 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -1087,8 +1087,8 @@ static void free_iommu(struct intel_iommu *iommu)
if (iommu->irq) {
free_irq(iommu->irq, iommu);
- irq_set_handler_data(iommu->irq, NULL);
dmar_free_hwirq(iommu->irq);
+ iommu->irq = 0;
}
if (iommu->qi) {
@@ -1642,23 +1642,14 @@ int dmar_set_interrupt(struct intel_iommu *iommu)
if (iommu->irq)
return 0;
- irq = dmar_alloc_hwirq();
- if (irq <= 0) {
+ irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
+ if (irq > 0) {
+ iommu->irq = irq;
+ } else {
pr_err("IOMMU: no free vectors\n");
return -EINVAL;
}
- irq_set_handler_data(irq, iommu);
- iommu->irq = irq;
-
- ret = arch_setup_dmar_msi(irq);
- if (ret) {
- irq_set_handler_data(irq, NULL);
- iommu->irq = 0;
- dmar_free_hwirq(irq);
- return ret;
- }
-
ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
if (ret)
pr_err("IOMMU: can't request irq\n");
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 5709ae9c3e77..80f1d1486247 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -8,6 +8,7 @@
#include <linux/irq.h>
#include <linux/intel-iommu.h>
#include <linux/acpi.h>
+#include <linux/irqdomain.h>
#include <asm/io_apic.h>
#include <asm/smp.h>
#include <asm/cpu.h>
@@ -17,6 +18,11 @@
#include "irq_remapping.h"
+enum irq_mode {
+ IRQ_REMAPPING,
+ IRQ_POSTING,
+};
+
struct ioapic_scope {
struct intel_iommu *iommu;
unsigned int id;
@@ -31,6 +37,22 @@ struct hpet_scope {
unsigned int devfn;
};
+struct irq_2_iommu {
+ struct intel_iommu *iommu;
+ u16 irte_index;
+ u16 sub_handle;
+ u8 irte_mask;
+ enum irq_mode mode;
+};
+
+struct intel_ir_data {
+ struct irq_2_iommu irq_2_iommu;
+ struct irte irte_entry;
+ union {
+ struct msi_msg msi_entry;
+ };
+};
+
#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
#define IRTE_DEST(dest) ((eim_mode) ? dest : dest << 8)
@@ -50,43 +72,14 @@ static struct hpet_scope ir_hpet[MAX_HPET_TBS];
* the dmar_global_lock.
*/
static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
+static struct irq_domain_ops intel_ir_domain_ops;
static int __init parse_ioapics_under_ir(void);
-static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
-{
- struct irq_cfg *cfg = irq_cfg(irq);
- return cfg ? &cfg->irq_2_iommu : NULL;
-}
-
-static int get_irte(int irq, struct irte *entry)
-{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- unsigned long flags;
- int index;
-
- if (!entry || !irq_iommu)
- return -1;
-
- raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
-
- if (unlikely(!irq_iommu->iommu)) {
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
- return -1;
- }
-
- index = irq_iommu->irte_index + irq_iommu->sub_handle;
- *entry = *(irq_iommu->iommu->ir_table->base + index);
-
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
- return 0;
-}
-
-static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
+static int alloc_irte(struct intel_iommu *iommu, int irq,
+ struct irq_2_iommu *irq_iommu, u16 count)
{
struct ir_table *table = iommu->ir_table;
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- struct irq_cfg *cfg = irq_cfg(irq);
unsigned int mask = 0;
unsigned long flags;
int index;
@@ -113,11 +106,11 @@ static int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
if (index < 0) {
pr_warn("IR%d: can't allocate an IRTE\n", iommu->seq_id);
} else {
- cfg->remapped = 1;
irq_iommu->iommu = iommu;
irq_iommu->irte_index = index;
irq_iommu->sub_handle = 0;
irq_iommu->irte_mask = mask;
+ irq_iommu->mode = IRQ_REMAPPING;
}
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
@@ -135,47 +128,9 @@ static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
return qi_submit_sync(&desc, iommu);
}
-static int map_irq_to_irte_handle(int irq, u16 *sub_handle)
-{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- unsigned long flags;
- int index;
-
- if (!irq_iommu)
- return -1;
-
- raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
- *sub_handle = irq_iommu->sub_handle;
- index = irq_iommu->irte_index;
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
- return index;
-}
-
-static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
+static int modify_irte(struct irq_2_iommu *irq_iommu,
+ struct irte *irte_modified)
{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- struct irq_cfg *cfg = irq_cfg(irq);
- unsigned long flags;
-
- if (!irq_iommu)
- return -1;
-
- raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
-
- cfg->remapped = 1;
- irq_iommu->iommu = iommu;
- irq_iommu->irte_index = index;
- irq_iommu->sub_handle = subhandle;
- irq_iommu->irte_mask = 0;
-
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-
- return 0;
-}
-
-static int modify_irte(int irq, struct irte *irte_modified)
-{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
struct intel_iommu *iommu;
unsigned long flags;
struct irte *irte;
@@ -196,6 +151,9 @@ static int modify_irte(int irq, struct irte *irte_modified)
__iommu_flush_cache(iommu, irte, sizeof(*irte));
rc = qi_flush_iec(iommu, index, 0);
+
+ /* Update iommu mode according to the IRTE mode */
+ irq_iommu->mode = irte->pst ? IRQ_POSTING : IRQ_REMAPPING;
raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
return rc;
@@ -242,7 +200,7 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
return 0;
iommu = irq_iommu->iommu;
- index = irq_iommu->irte_index + irq_iommu->sub_handle;
+ index = irq_iommu->irte_index;
start = iommu->ir_table->base + index;
end = start + (1 << irq_iommu->irte_mask);
@@ -257,29 +215,6 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
return qi_flush_iec(iommu, index, irq_iommu->irte_mask);
}
-static int free_irte(int irq)
-{
- struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
- unsigned long flags;
- int rc;
-
- if (!irq_iommu)
- return -1;
-
- raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
-
- rc = clear_entries(irq_iommu);
-
- irq_iommu->iommu = NULL;
- irq_iommu->irte_index = 0;
- irq_iommu->sub_handle = 0;
- irq_iommu->irte_mask = 0;
-
- raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
-
- return rc;
-}
-
/*
* source validation type
*/
@@ -488,7 +423,6 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO,
INTR_REMAP_PAGE_ORDER);
-
if (!pages) {
pr_err("IR%d: failed to allocate pages of order %d\n",
iommu->seq_id, INTR_REMAP_PAGE_ORDER);
@@ -502,11 +436,23 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
goto out_free_pages;
}
+ iommu->ir_domain = irq_domain_add_hierarchy(arch_get_ir_parent_domain(),
+ 0, INTR_REMAP_TABLE_ENTRIES,
+ NULL, &intel_ir_domain_ops,
+ iommu);
+ if (!iommu->ir_domain) {
+ pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
+ goto out_free_bitmap;
+ }
+ iommu->ir_msi_domain = arch_create_msi_irq_domain(iommu->ir_domain);
+
ir_table->base = page_address(pages);
ir_table->bitmap = bitmap;
iommu->ir_table = ir_table;
return 0;
+out_free_bitmap:
+ kfree(bitmap);
out_free_pages:
__free_pages(pages, INTR_REMAP_PAGE_ORDER);
out_free_table:
@@ -517,6 +463,14 @@ out_free_table:
static void intel_teardown_irq_remapping(struct intel_iommu *iommu)
{
if (iommu && iommu->ir_table) {
+ if (iommu->ir_msi_domain) {
+ irq_domain_remove(iommu->ir_msi_domain);
+ iommu->ir_msi_domain = NULL;
+ }
+ if (iommu->ir_domain) {
+ irq_domain_remove(iommu->ir_domain);
+ iommu->ir_domain = NULL;
+ }
free_pages((unsigned long)iommu->ir_table->base,
INTR_REMAP_PAGE_ORDER);
kfree(iommu->ir_table->bitmap);
@@ -627,6 +581,26 @@ error:
return -ENODEV;
}
+/*
+ * Set Posted-Interrupts capability.
+ */
+static inline void set_irq_posting_cap(void)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ if (!disable_irq_post) {
+ intel_irq_remap_ops.capability |= 1 << IRQ_POSTING_CAP;
+
+ for_each_iommu(iommu, drhd)
+ if (!cap_pi_support(iommu->cap)) {
+ intel_irq_remap_ops.capability &=
+ ~(1 << IRQ_POSTING_CAP);
+ break;
+ }
+ }
+}
+
static int __init intel_enable_irq_remapping(void)
{
struct dmar_drhd_unit *drhd;
@@ -702,12 +676,7 @@ static int __init intel_enable_irq_remapping(void)
irq_remapping_enabled = 1;
- /*
- * VT-d has a different layout for IO-APIC entries when
- * interrupt remapping is enabled. So it needs a special routine
- * to print IO-APIC entries for debugging purposes too.
- */
- x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
+ set_irq_posting_cap();
pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
@@ -909,6 +878,12 @@ static void disable_irq_remapping(void)
iommu_disable_irq_remapping(iommu);
}
+
+ /*
+ * Clear Posted-Interrupts capability.
+ */
+ if (!disable_irq_post)
+ intel_irq_remap_ops.capability &= ~(1 << IRQ_POSTING_CAP);
}
static int reenable_irq_remapping(int eim)
@@ -936,6 +911,8 @@ static int reenable_irq_remapping(int eim)
if (!setup)
goto error;
+ set_irq_posting_cap();
+
return 0;
error:
@@ -945,8 +922,7 @@ error:
return -1;
}
-static void prepare_irte(struct