summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-08 12:37:56 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-08 12:37:56 -0700
commit2d3e4866dea96b0506395b47bfefb234f2088dac (patch)
treed5c7bd97d222bef46f9d73adee8c79dbdb9f82f4 /arch/powerpc/kvm
parent9c6ee01ed5bb1ee489d580eaa60d7eb5a8ede336 (diff)
parent2e5b0bd9cc6172edef502dfae28ae790f74a882e (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - HYP mode stub supports kexec/kdump on 32-bit - improved PMU support - virtual interrupt controller performance improvements - support for userspace virtual interrupt controller (slower, but necessary for KVM on the weird Broadcom SoCs used by the Raspberry Pi 3) MIPS: - basic support for hardware virtualization (ImgTec P5600/P6600/I6400 and Cavium Octeon III) PPC: - in-kernel acceleration for VFIO s390: - support for guests without storage keys - adapter interruption suppression x86: - usual range of nVMX improvements, notably nested EPT support for accessed and dirty bits - emulation of CPL3 CPUID faulting generic: - first part of VCPU thread request API - kvm_stat improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (227 commits) kvm: nVMX: Don't validate disabled secondary controls KVM: put back #ifndef CONFIG_S390 around kvm_vcpu_kick Revert "KVM: Support vCPU-based gfn->hva cache" tools/kvm: fix top level makefile KVM: x86: don't hold kvm->lock in KVM_SET_GSI_ROUTING KVM: Documentation: remove VM mmap documentation kvm: nVMX: Remove superfluous VMX instruction fault checks KVM: x86: fix emulation of RSM and IRET instructions KVM: mark requests that need synchronization KVM: return if kvm_vcpu_wake_up() did wake up the VCPU KVM: add explicit barrier to kvm_vcpu_kick KVM: perform a wake_up in kvm_make_all_cpus_request KVM: mark requests that do not need a wakeup KVM: remove #ifndef CONFIG_S390 around kvm_vcpu_wake_up KVM: x86: always use kvm_make_request instead of set_bit KVM: add kvm_{test,clear}_request to replace {test,clear}_bit s390: kvm: Cpu model support for msa6, msa7 and msa8 KVM: x86: remove irq disablement around KVM_SET_CLOCK/KVM_GET_CLOCK kvm: better MWAIT emulation for guests KVM: x86: virtualize cpuid faulting ...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c18
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c7
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c315
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c303
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c34
-rw-r--r--arch/powerpc/kvm/book3s_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_pr.c14
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c2
-rw-r--r--arch/powerpc/kvm/booke.c9
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c5
-rw-r--r--arch/powerpc/kvm/emulate.c8
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c472
-rw-r--r--arch/powerpc/kvm/powerpc.c327
15 files changed, 1392 insertions, 132 deletions
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 029be26b5a17..65a471de96de 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -67,6 +67,7 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
+ select SPAPR_TCE_IOMMU if IOMMU_SUPPORT
---help---
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index aedacefd961d..8c4d7e9d27d2 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -197,6 +197,24 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
}
EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
+void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, 0);
+}
+
+void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_ALTIVEC, 0);
+}
+
+void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_VSX, 0);
+}
+
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 70153578131a..29ebe2fd5867 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -319,6 +319,7 @@ do_second:
gpte->may_execute = true;
gpte->may_read = false;
gpte->may_write = false;
+ gpte->wimg = r & HPTE_R_WIMG;
switch (pp) {
case 0:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 74b0153780e3..9a4614cd0e53 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -145,6 +145,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
else
kvmppc_mmu_flush_icache(pfn);
+ rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
+
/*
* Use 64K pages if possible; otherwise, on 64K page kernels,
* we need to transfer 4 more bits from guest real to host real addr.
@@ -177,12 +179,15 @@ map_again:
ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
hpsize, hpsize, MMU_SEGSIZE_256M);
- if (ret < 0) {
+ if (ret == -1) {
/* If we couldn't map a primary PTE, try a secondary */
hash = ~hash;
vflags ^= HPTE_V_SECONDARY;
attempt++;
goto map_again;
+ } else if (ret < 0) {
+ r = -EIO;
+ goto out_unlock;
} else {
trace_kvm_book3s_64_mmu_map(rflags, hpteg,
vpn, hpaddr, orig_pte);
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 3e26cd4979f9..a160c14304eb 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -28,6 +28,8 @@
#include <linux/hugetlb.h>
#include <linux/list.h>
#include <linux/anon_inodes.h>
+#include <linux/iommu.h>
+#include <linux/file.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
@@ -40,6 +42,7 @@
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
+#include <asm/mmu_context.h>
static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
{
@@ -91,6 +94,137 @@ static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
return ret;
}
+static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
+{
+ struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
+ struct kvmppc_spapr_tce_iommu_table, rcu);
+
+ iommu_tce_table_put(stit->tbl);
+
+ kfree(stit);
+}
+
+static void kvm_spapr_tce_liobn_put(struct kref *kref)
+{
+ struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
+ struct kvmppc_spapr_tce_iommu_table, kref);
+
+ list_del_rcu(&stit->next);
+
+ call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
+}
+
+extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+ struct iommu_group *grp)
+{
+ int i;
+ struct kvmppc_spapr_tce_table *stt;
+ struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
+ struct iommu_table_group *table_group = NULL;
+
+ list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+
+ table_group = iommu_group_get_iommudata(grp);
+ if (WARN_ON(!table_group))
+ continue;
+
+ list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ if (table_group->tables[i] != stit->tbl)
+ continue;
+
+ kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
+ return;
+ }
+ }
+ }
+}
+
+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+ struct iommu_group *grp)
+{
+ struct kvmppc_spapr_tce_table *stt = NULL;
+ bool found = false;
+ struct iommu_table *tbl = NULL;
+ struct iommu_table_group *table_group;
+ long i;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ struct fd f;
+
+ f = fdget(tablefd);
+ if (!f.file)
+ return -EBADF;
+
+ list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt == f.file->private_data) {
+ found = true;
+ break;
+ }
+ }
+
+ fdput(f);
+
+ if (!found)
+ return -EINVAL;
+
+ table_group = iommu_group_get_iommudata(grp);
+ if (WARN_ON(!table_group))
+ return -EFAULT;
+
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ struct iommu_table *tbltmp = table_group->tables[i];
+
+ if (!tbltmp)
+ continue;
+ /*
+ * Make sure hardware table parameters are exactly the same;
+ * this is used in the TCE handlers where boundary checks
+ * use only the first attached table.
+ */
+ if ((tbltmp->it_page_shift == stt->page_shift) &&
+ (tbltmp->it_offset == stt->offset) &&
+ (tbltmp->it_size == stt->size)) {
+ /*
+ * Reference the table to avoid races with
+ * add/remove DMA windows.
+ */
+ tbl = iommu_tce_table_get(tbltmp);
+ break;
+ }
+ }
+ if (!tbl)
+ return -EINVAL;
+
+ list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
+ if (tbl != stit->tbl)
+ continue;
+
+ if (!kref_get_unless_zero(&stit->kref)) {
+ /* stit is being destroyed */
+ iommu_tce_table_put(tbl);
+ return -ENOTTY;
+ }
+ /*
+ * The table is already known to this KVM, we just increased
+ * its KVM reference counter and can return.
+ */
+ return 0;
+ }
+
+ stit = kzalloc(sizeof(*stit), GFP_KERNEL);
+ if (!stit) {
+ iommu_tce_table_put(tbl);
+ return -ENOMEM;
+ }
+
+ stit->tbl = tbl;
+ kref_init(&stit->kref);
+
+ list_add_rcu(&stit->next, &stt->iommu_tables);
+
+ return 0;
+}
+
static void release_spapr_tce_table(struct rcu_head *head)
{
struct kvmppc_spapr_tce_table *stt = container_of(head,
@@ -130,9 +264,18 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
+ struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
list_del_rcu(&stt->list);
+ list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+ WARN_ON(!kref_read(&stit->kref));
+ while (1) {
+ if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
+ break;
+ }
+ }
+
kvm_put_kvm(stt->kvm);
kvmppc_account_memlimit(
@@ -164,7 +307,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return -EBUSY;
}
- size = args->size;
+ size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret) {
@@ -183,6 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm;
+ INIT_LIST_HEAD_RCU(&stt->iommu_tables);
for (i = 0; i < npages; i++) {
stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -211,15 +355,106 @@ fail:
return ret;
}
+static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
+{
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
+
+ iommu_tce_xchg(tbl, entry, &hpa, &dir);
+}
+
+static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+ unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+ mem = mm_iommu_lookup(kvm->mm, *pua, pgsize);
+ if (!mem)
+ return H_TOO_HARD;
+
+ mm_iommu_mapped_dec(mem);
+
+ *pua = 0;
+
+ return H_SUCCESS;
+}
+
+static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ enum dma_data_direction dir = DMA_NONE;
+ unsigned long hpa = 0;
+ long ret;
+
+ if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
+ return H_HARDWARE;
+
+ if (dir == DMA_NONE)
+ return H_SUCCESS;
+
+ ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+ if (ret != H_SUCCESS)
+ iommu_tce_xchg(tbl, entry, &hpa, &dir);
+
+ return ret;
+}
+
+long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long ua,
+ enum dma_data_direction dir)
+{
+ long ret;
+ unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+ struct mm_iommu_table_group_mem_t *mem;
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+ mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
+ if (!mem)
+ /* This only handles v2 IOMMU type, v1 is handled via ioctl() */
+ return H_TOO_HARD;
+
+ if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa)))
+ return H_HARDWARE;
+
+ if (mm_iommu_mapped_inc(mem))
+ return H_CLOSED;
+
+ ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
+ if (WARN_ON_ONCE(ret)) {
+ mm_iommu_mapped_dec(mem);
+ return H_HARDWARE;
+ }
+
+ if (dir != DMA_NONE)
+ kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+ *pua = ua;
+
+ return 0;
+}
+
long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce)
{
- struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
- long ret;
+ struct kvmppc_spapr_tce_table *stt;
+ long ret, idx;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ unsigned long entry, ua = 0;
+ enum dma_data_direction dir;
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -231,7 +466,35 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
- kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+ dir = iommu_tce_direction(tce);
+ if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
+ tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+ return H_PARAMETER;
+
+ entry = ioba >> stt->page_shift;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ if (dir == DMA_NONE) {
+ ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
+ stit->tbl, entry);
+ } else {
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl,
+ entry, ua, dir);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ }
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ return ret;
+
+ WARN_ON_ONCE(1);
+ kvmppc_clear_tce(stit->tbl, entry);
+ }
+
+ kvmppc_tce_put(stt, entry, tce);
return H_SUCCESS;
}
@@ -246,8 +509,9 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long entry, ua = 0;
u64 __user *tces;
u64 tce;
+ struct kvmppc_spapr_tce_iommu_table *stit;
- stt = kvmppc_find_table(vcpu, liobn);
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -284,6 +548,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS)
goto unlock_exit;
+ if (kvmppc_gpa_to_ua(vcpu->kvm,
+ tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
+ &ua, NULL))
+ return H_PARAMETER;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ ret = kvmppc_tce_iommu_map(vcpu->kvm,
+ stit->tbl, entry + i, ua,
+ iommu_tce_direction(tce));
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ goto unlock_exit;
+
+ WARN_ON_ONCE(1);
+ kvmppc_clear_tce(stit->tbl, entry);
+ }
+
kvmppc_tce_put(stt, entry + i, tce);
}
@@ -300,8 +584,9 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
{
struct kvmppc_spapr_tce_table *stt;
long i, ret;
+ struct kvmppc_spapr_tce_iommu_table *stit;
- stt = kvmppc_find_table(vcpu, liobn);
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -313,6 +598,24 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
return H_PARAMETER;
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ unsigned long entry = ioba >> stit->tbl->it_page_shift;
+
+ for (i = 0; i < npages; ++i) {
+ ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
+ stit->tbl, entry + i);
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ return ret;
+
+ WARN_ON_ONCE(1);
+ kvmppc_clear_tce(stit->tbl, entry);
+ }
+ }
+
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index e4c4ea973e57..eda0a8f6fae8 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -40,6 +40,31 @@
#include <asm/iommu.h>
#include <asm/tce.h>
+#ifdef CONFIG_BUG
+
+#define WARN_ON_ONCE_RM(condition) ({ \
+ static bool __section(.data.unlikely) __warned; \
+ int __ret_warn_once = !!(condition); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ __warned = true; \
+ pr_err("WARN_ON_ONCE_RM: (%s) at %s:%u\n", \
+ __stringify(condition), \
+ __func__, __LINE__); \
+ dump_stack(); \
+ } \
+ unlikely(__ret_warn_once); \
+})
+
+#else
+
+#define WARN_ON_ONCE_RM(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ unlikely(__ret_warn_on); \
+})
+
+#endif
+
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
/*
@@ -48,10 +73,9 @@
* WARNING: This will be called in real or virtual mode on HV KVM and virtual
* mode on PR KVM
*/
-struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
+struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
unsigned long liobn)
{
- struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
@@ -63,27 +87,6 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
EXPORT_SYMBOL_GPL(kvmppc_find_table);
/*
- * Validates IO address.
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- * mode on PR KVM
- */
-long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
- unsigned long ioba, unsigned long npages)
-{
- unsigned long mask = (1ULL << stt->page_shift) - 1;
- unsigned long idx = ioba >> stt->page_shift;
-
- if ((ioba & mask) || (idx < stt->offset) ||
- (idx - stt->offset + npages > stt->size) ||
- (idx + npages < idx))
- return H_PARAMETER;
-
- return H_SUCCESS;
-}
-EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
-
-/*
* Validates TCE address.
* At the moment flags and page mask are validated.
* As the host kernel does not access those addresses (just puts them
@@ -96,10 +99,14 @@ EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
*/
long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
{
- unsigned long page_mask = ~((1ULL << stt->page_shift) - 1);
- unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ);
+ unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ enum dma_data_direction dir = iommu_tce_direction(tce);
+
+ /* Allow userspace to poison TCE table */
+ if (dir == DMA_NONE)
+ return H_SUCCESS;
- if (tce & mask)
+ if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER;
return H_SUCCESS;
@@ -179,15 +186,122 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
+{
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
+
+ iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+}
+
+static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+ unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+ pua = (void *) vmalloc_to_phys(pua);
+ if (WARN_ON_ONCE_RM(!pua))
+ return H_HARDWARE;
+
+ mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize);
+ if (!mem)
+ return H_TOO_HARD;
+
+ mm_iommu_mapped_dec(mem);
+
+ *pua = 0;
+
+ return H_SUCCESS;
+}
+
+static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ enum dma_data_direction dir = DMA_NONE;
+ unsigned long hpa = 0;
+ long ret;
+
+ if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
+ /*
+ * real mode xchg can fail if struct page crosses
+ * a page boundary
+ */
+ return H_TOO_HARD;
+
+ if (dir == DMA_NONE)
+ return H_SUCCESS;
+
+ ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+ if (ret)
+ iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+
+ return ret;
+}
+
+static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long ua,
+ enum dma_data_direction dir)
+{
+ long ret;
+ unsigned long hpa = 0;
+ unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+ struct mm_iommu_table_group_mem_t *mem;
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+ mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift);
+ if (!mem)
+ return H_TOO_HARD;
+
+ if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
+ return H_HARDWARE;
+
+ pua = (void *) vmalloc_to_phys(pua);
+ if (WARN_ON_ONCE_RM(!pua))
+ return H_HARDWARE;
+
+ if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
+ return H_CLOSED;
+
+ ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+ if (ret) {
+ mm_iommu_mapped_dec(mem);
+ /*
+ * real mode xchg can fail if struct page crosses
+ * a page boundary
+ */
+ return H_TOO_HARD;
+ }
+
+ if (dir != DMA_NONE)
+ kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+ *pua = ua;
+
+ return 0;
+}
+
long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce)
{
- struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ struct kvmppc_spapr_tce_table *stt;
long ret;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ unsigned long entry, ua = 0;
+ enum dma_data_direction dir;
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -199,7 +313,32 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
- kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+ dir = iommu_tce_direction(tce);
+ if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
+ tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+ return H_PARAMETER;
+
+ entry = ioba >> stt->page_shift;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ if (dir == DMA_NONE)
+ ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+ stit->tbl, entry);
+ else
+ ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+ stit->tbl, entry, ua, dir);
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ return ret;
+
+ WARN_ON_ONCE_RM(1);
+ kvmppc_rm_clear_tce(stit->tbl, entry);
+ }
+
+ kvmppc_tce_put(stt, entry, tce);
return H_SUCCESS;
}
@@ -239,8 +378,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
long i, ret = H_SUCCESS;
unsigned long tces, entry, ua = 0;
unsigned long *rmap = NULL;
+ bool prereg = false;
+ struct kvmppc_spapr_tce_iommu_table *stit;
- stt = kvmppc_find_table(vcpu, liobn);
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -259,23 +400,49 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS)
return ret;
- if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
- return H_TOO_HARD;
+ if (mm_iommu_preregistered(vcpu->kvm->mm)) {
+ /*
+ * We get here if guest memory was pre-registered which
+ * is normally VFIO case and gpa->hpa translation does not
+ * depend on hpt.
+ */
+ struct mm_iommu_table_group_mem_t *mem;
- rmap = (void *) vmalloc_to_phys(rmap);
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+ return H_TOO_HARD;
- /*
- * Synchronize with the MMU notifier callbacks in
- * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
- * While we have the rmap lock, code running on other CPUs
- * cannot finish unmapping the host real page that backs
- * this guest real page, so we are OK to access the host
- * real page.
- */
- lock_rmap(rmap);
- if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
- ret = H_TOO_HARD;
- goto unlock_exit;
+ mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
+ if (mem)
+ prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0;
+ }
+
+ if (!prereg) {
+ /*
+ * This is usually a case of a guest with emulated devices only
+ * when TCE list is not in preregistered memory.
+ * We do not require memory to be preregistered in this case
+ * so lock rmap and do __find_linux_pte_or_hugepte().
+ */
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ return H_TOO_HARD;
+
+ rmap = (void *) vmalloc_to_phys(rmap);
+ if (WARN_ON_ONCE_RM(!rmap))
+ return H_HARDWARE;
+
+ /*
+ * Synchronize with the MMU notifier callbacks in
+ * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
+ * While we have the rmap lock, code running on other CPUs
+ * cannot finish unmapping the host real page that backs
+ * this guest real page, so we are OK to access the host
+ * real page.
+ */
+ lock_rmap(rmap);
+ if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
}
for (i = 0; i < npages; ++i) {
@@ -285,11 +452,33 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS)
goto unlock_exit;
+ ua = 0;
+ if (kvmppc_gpa_to_ua(vcpu->kvm,
+ tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
+ &ua, NULL))
+ return H_PARAMETER;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+ stit->tbl, entry + i, ua,
+ iommu_tce_direction(tce));
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ goto unlock_exit;
+
+ WARN_ON_ONCE_RM(1);
+ kvmppc_rm_clear_tce(stit->tbl, entry);
+ }
+
kvmppc_tce_put(stt, entry + i, tce);
}
unlock_exit:
- unlock_rmap(rmap);
+ if (rmap)
+ unlock_rmap(rmap);
return ret;
}
@@ -300,8 +489,9 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
{
struct kvmppc_spapr_tce_table *stt;
long i, ret;
+ struct kvmppc_spapr_tce_iommu_table *stit;
- stt = kvmppc_find_table(vcpu, liobn);
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -313,6 +503,24 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
return H_PARAMETER;
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ unsigned long entry = ioba >> stit->tbl->it_page_shift;
+
+ for (i = 0; i < npages; ++i) {
+ ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+ stit->tbl, entry + i);
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ return ret;
+
+ WARN_ON_ONCE_RM(1);
+ kvmppc_rm_clear_tce(stit->tbl, entry);
+ }
+ }
+
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
@@ -322,12 +530,13 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba)
{
- struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ struct kvmppc_spapr_tce_table *stt;
long ret;
unsigned long idx;
struct page *page;
u64 *tbl;
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 8359752b3efc..68d68983948e 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -503,10 +503,18 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
break;
unprivileged:
default:
- printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
-#ifndef DEBUG_SPR
- emulated = EMULATE_FAIL;
-#endif
+ pr_info_ratelimited("KVM: invalid SPR write: %d\n", sprn);
+ if (sprn & 0x10) {
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ emulated = EMULATE_AGAIN;
+ }
+ } else {
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ emulated = EMULATE_AGAIN;
+ }
+ }
break;
}
@@ -648,10 +656,20 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
break;
default:
unprivileged:
- printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
-#ifndef DEBUG_SPR
- emulated = EMULATE_FAIL;
-#endif
+ pr_info_ratelimited("KVM: invalid SPR read: %d\n", sprn);
+ if (sprn & 0x10) {
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ emulated = EMULATE_AGAIN;
+ }
+ } else {
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0 ||
+ sprn == 4 || sprn == 5 || sprn == 6) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ emulated = EMULATE_AGAIN;
+ }
+ }
+
break;
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index fadb75abfe37..549dd6070dee 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3624,11 +3624,9 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
return -EIO;
mutex_lock(&kvm->lock);
+ if (!kvm->arch.pimap)
+ goto unlock;
- if (kvm->arch.pimap == NULL) {
- mutex_unlock(&kvm->lock);
- return 0;
- }
pimap = kvm->arch.pimap;
for (i = 0; i < pimap->n_mapped; i++) {
@@ -3650,7 +3648,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
* We don't free this structure even when the count goes to
* zero. The structure is freed when we destroy the VM.<