summaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-17 10:33:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-17 10:33:30 -0700
commit0ef0fd351550130129bbdb77362488befd7b69d2 (patch)
tree23186172f5f85c06e18e3ee1a9619879df03c5df /arch/powerpc
parent4489da7183099f569a7d3dd819c975073c04bc72 (diff)
parentc011d23ba046826ccf8c4a4a6c1d01c9ccaa1403 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - support for SVE and Pointer Authentication in guests - PMU improvements POWER: - support for direct access to the POWER9 XIVE interrupt controller - memory and performance optimizations x86: - support for accessing memory not backed by struct page - fixes and refactoring Generic: - dirty page tracking improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (155 commits) kvm: fix compilation on aarch64 Revert "KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU" kvm: x86: Fix L1TF mitigation for shadow MMU KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible KVM: PPC: Book3S: Remove useless checks in 'release' method of KVM device KVM: PPC: Book3S HV: XIVE: Fix spelling mistake "acessing" -> "accessing" KVM: PPC: Book3S HV: Make sure to load LPID for radix VCPUs kvm: nVMX: Set nested_run_pending in vmx_set_nested_state after checks complete tests: kvm: Add tests for KVM_SET_NESTED_STATE KVM: nVMX: KVM_SET_NESTED_STATE - Tear down old EVMCS state before setting new state tests: kvm: Add tests for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_CPU_ID tests: kvm: Add tests to .gitignore KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 KVM: Fix kvm_clear_dirty_log_protect off-by-(minus-)one KVM: Fix the bitmap range to copy during clear dirty KVM: arm64: Fix ptrauth ID register masking logic KVM: x86: use direct accessors for RIP and RSP KVM: VMX: Use accessors for GPRs outside of dedicated caching logic KVM: x86: Omit caching logic for always-available GPRs kvm, x86: Properly check whether a pfn is an MMIO or not ...
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/kvm_host.h11
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h41
-rw-r--r--arch/powerpc/include/asm/xive.h3
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h46
-rw-r--r--arch/powerpc/kvm/Makefile2
-rw-r--r--arch/powerpc/kvm/book3s.c42
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c96
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c105
-rw-r--r--arch/powerpc/kvm/book3s_hv.c152
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c57
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c144
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S86
-rw-r--r--arch/powerpc/kvm/book3s_xive.c250
-rw-r--r--arch/powerpc/kvm/book3s_xive.h37
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c1249
-rw-r--r--arch/powerpc/kvm/book3s_xive_template.c78
-rw-r--r--arch/powerpc/kvm/powerpc.c40
-rw-r--r--arch/powerpc/sysdev/xive/native.c11
18 files changed, 2176 insertions, 274 deletions
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index e6b5bb012ccb..013c76a0a03e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -201,6 +201,8 @@ struct kvmppc_spapr_tce_iommu_table {
struct kref kref;
};
+#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
+
struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
@@ -210,6 +212,7 @@ struct kvmppc_spapr_tce_table {
u64 offset; /* in pages */
u64 size; /* window size in pages */
struct list_head iommu_tables;
+ struct mutex alloc_lock;
struct page *pages[0];
};
@@ -222,6 +225,7 @@ extern struct kvm_device_ops kvm_xics_ops;
struct kvmppc_xive;
struct kvmppc_xive_vcpu;
extern struct kvm_device_ops kvm_xive_ops;
+extern struct kvm_device_ops kvm_xive_native_ops;
struct kvmppc_passthru_irqmap;
@@ -312,7 +316,11 @@ struct kvm_arch {
#endif
#ifdef CONFIG_KVM_XICS
struct kvmppc_xics *xics;
- struct kvmppc_xive *xive;
+ struct kvmppc_xive *xive; /* Current XIVE device in use */
+ struct {
+ struct kvmppc_xive *native;
+ struct kvmppc_xive *xics_on_xive;
+ } xive_devices;
struct kvmppc_passthru_irqmap *pimap;
#endif
struct kvmppc_ops *kvm_ops;
@@ -449,6 +457,7 @@ struct kvmppc_passthru_irqmap {
#define KVMPPC_IRQ_DEFAULT 0
#define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2 /* Includes a XIVE option */
+#define KVMPPC_IRQ_XIVE 3 /* XIVE native exploitation mode */
#define MMIO_HPTE_CACHE_SIZE 4
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index ac22b28ae78d..bc892380e6cd 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -197,10 +197,6 @@ extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
(iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
(stt)->size, (ioba), (npages)) ? \
H_PARAMETER : H_SUCCESS)
-extern long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
- unsigned long *ua, unsigned long **prmap);
-extern void kvmppc_tce_put(struct kvmppc_spapr_tce_table *tt,
- unsigned long idx, unsigned long tce);
extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
extern long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
@@ -273,6 +269,7 @@ union kvmppc_one_reg {
u64 addr;
u64 length;
} vpaval;
+ u64 xive_timaval[2];
};
struct kvmppc_ops {
@@ -480,6 +477,9 @@ extern void kvm_hv_vm_activated(void);
extern void kvm_hv_vm_deactivated(void);
extern bool kvm_hv_mode_active(void);
+extern void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
+ struct kvm_nested_guest *nested);
+
#else
static inline void __init kvm_cma_reserve(void)
{}
@@ -594,6 +594,22 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.irq_type == KVMPPC_IRQ_XIVE;
+}
+
+extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu);
+extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_native_init_module(void);
+extern void kvmppc_xive_native_exit_module(void);
+extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val);
+extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val);
+
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; }
@@ -617,6 +633,21 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
+
+static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
+ { return 0; }
+static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
+ struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
+static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_native_init_module(void) { }
+static inline void kvmppc_xive_native_exit_module(void) { }
+static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val)
+{ return 0; }
+static inline int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
+ union kvmppc_one_reg *val)
+{ return -ENOENT; }
+
#endif /* CONFIG_KVM_XIVE */
#if defined(CONFIG_PPC_POWERNV) && defined(CONFIG_KVM_BOOK3S_64_HANDLER)
@@ -665,6 +696,8 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index);
+long kvmppc_rm_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src);
long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
unsigned long slb_v, unsigned int status, bool data);
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index b579a943407b..eaf76f57023a 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -23,6 +23,7 @@
* same offset regardless of where the code is executing
*/
extern void __iomem *xive_tima;
+extern unsigned long xive_tima_os;
/*
* Offset in the TM area of our current execution level (provided by
@@ -73,6 +74,8 @@ struct xive_q {
u32 esc_irq;
atomic_t count;
atomic_t pending_count;
+ u64 guest_qaddr;
+ u32 guest_qshift;
};
/* Global enable flags for the XIVE support */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 26ca425f4c2c..b0f72dea8b11 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -482,6 +482,8 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
+#define KVM_REG_PPC_VP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x8d)
+
/* Device control API: PPC-specific devices */
#define KVM_DEV_MPIC_GRP_MISC 1
#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
@@ -677,4 +679,48 @@ struct kvm_ppc_cpu_char {
#define KVM_XICS_PRESENTED (1ULL << 43)
#define KVM_XICS_QUEUED (1ULL << 44)
+/* POWER9 XIVE Native Interrupt Controller */
+#define KVM_DEV_XIVE_GRP_CTRL 1
+#define KVM_DEV_XIVE_RESET 1
+#define KVM_DEV_XIVE_EQ_SYNC 2
+#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
+#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
+#define KVM_DEV_XIVE_GRP_SOURCE_SYNC 5 /* 64-bit source identifier */
+
+/* Layout of 64-bit XIVE source attribute values */
+#define KVM_XIVE_LEVEL_SENSITIVE (1ULL << 0)
+#define KVM_XIVE_LEVEL_ASSERTED (1ULL << 1)
+
+/* Layout of 64-bit XIVE source configuration attribute values */
+#define KVM_XIVE_SOURCE_PRIORITY_SHIFT 0
+#define KVM_XIVE_SOURCE_PRIORITY_MASK 0x7
+#define KVM_XIVE_SOURCE_SERVER_SHIFT 3
+#define KVM_XIVE_SOURCE_SERVER_MASK 0xfffffff8ULL
+#define KVM_XIVE_SOURCE_MASKED_SHIFT 32
+#define KVM_XIVE_SOURCE_MASKED_MASK 0x100000000ULL
+#define KVM_XIVE_SOURCE_EISN_SHIFT 33
+#define KVM_XIVE_SOURCE_EISN_MASK 0xfffffffe00000000ULL
+
+/* Layout of 64-bit EQ identifier */
+#define KVM_XIVE_EQ_PRIORITY_SHIFT 0
+#define KVM_XIVE_EQ_PRIORITY_MASK 0x7
+#define KVM_XIVE_EQ_SERVER_SHIFT 3
+#define KVM_XIVE_EQ_SERVER_MASK 0xfffffff8ULL
+
+/* Layout of EQ configuration values (64 bytes) */
+struct kvm_ppc_xive_eq {
+ __u32 flags;
+ __u32 qshift;
+ __u64 qaddr;
+ __u32 qtoggle;
+ __u32 qindex;
+ __u8 pad[40];
+};
+
+#define KVM_XIVE_EQ_ALWAYS_NOTIFY 0x00000001
+
+#define KVM_XIVE_TIMA_PAGE_OFFSET 0
+#define KVM_XIVE_ESB_PAGE_OFFSET 4
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 3223aec88b2c..4c67cc79de7c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -94,7 +94,7 @@ endif
kvm-book3s_64-objs-$(CONFIG_KVM_XICS) += \
book3s_xics.o
-kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o
+kvm-book3s_64-objs-$(CONFIG_KVM_XIVE) += book3s_xive.o book3s_xive_native.o
kvm-book3s_64-objs-$(CONFIG_SPAPR_TCE_IOMMU) += book3s_64_vio.o
kvm-book3s_64-module-objs := \
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 10c5579d20ce..61a212d0daf0 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -651,6 +651,18 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
*val = get_reg_val(id, kvmppc_xics_get_icp(vcpu));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_native_get_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
*val = get_reg_val(id, vcpu->arch.fscr);
break;
@@ -724,6 +736,18 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id,
r = kvmppc_xics_set_icp(vcpu, set_reg_val(id, *val));
break;
#endif /* CONFIG_KVM_XICS */
+#ifdef CONFIG_KVM_XIVE
+ case KVM_REG_PPC_VP_STATE:
+ if (!vcpu->arch.xive_vcpu) {
+ r = -ENXIO;
+ break;
+ }
+ if (xive_enabled())
+ r = kvmppc_xive_native_set_vp(vcpu, val);
+ else
+ r = -ENXIO;
+ break;
+#endif /* CONFIG_KVM_XIVE */
case KVM_REG_PPC_FSCR:
vcpu->arch.fscr = set_reg_val(id, *val);
break;
@@ -891,6 +915,17 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
kvmppc_rtas_tokens_free(kvm);
WARN_ON(!list_empty(&kvm->arch.spapr_tce_tables));
#endif
+
+#ifdef CONFIG_KVM_XICS
+ /*
+ * Free the XIVE devices which are not directly freed by the
+ * device 'release' method
+ */
+ kfree(kvm->arch.xive_devices.native);
+ kvm->arch.xive_devices.native = NULL;
+ kfree(kvm->arch.xive_devices.xics_on_xive);
+ kvm->arch.xive_devices.xics_on_xive = NULL;
+#endif /* CONFIG_KVM_XICS */
}
int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
@@ -1050,6 +1085,9 @@ static int kvmppc_book3s_init(void)
if (xics_on_xive()) {
kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
+ kvmppc_xive_native_init_module();
+ kvm_register_device_ops(&kvm_xive_native_ops,
+ KVM_DEV_TYPE_XIVE);
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
@@ -1060,8 +1098,10 @@ static int kvmppc_book3s_init(void)
static void kvmppc_book3s_exit(void)
{
#ifdef CONFIG_KVM_XICS
- if (xics_on_xive())
+ if (xics_on_xive()) {
kvmppc_xive_exit_module();
+ kvmppc_xive_native_exit_module();
+ }
#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kvmppc_book3s_exit_pr();
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index f100e331e69b..66270e07449a 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -228,11 +228,33 @@ static void release_spapr_tce_table(struct rcu_head *head)
unsigned long i, npages = kvmppc_tce_pages(stt->size);
for (i = 0; i < npages; i++)
- __free_page(stt->pages[i]);
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
kfree(stt);
}
+static struct page *kvm_spapr_get_tce_page(struct kvmppc_spapr_tce_table *stt,
+ unsigned long sttpage)
+{
+ struct page *page = stt->pages[sttpage];
+
+ if (page)
+ return page;
+
+ mutex_lock(&stt->alloc_lock);
+ page = stt->pages[sttpage];
+ if (!page) {
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ WARN_ON_ONCE(!page);
+ if (page)
+ stt->pages[sttpage] = page;
+ }
+ mutex_unlock(&stt->alloc_lock);
+
+ return page;
+}
+
static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
{
struct kvmppc_spapr_tce_table *stt = vmf->vma->vm_file->private_data;
@@ -241,7 +263,10 @@ static vm_fault_t kvm_spapr_tce_fault(struct vm_fault *vmf)
if (vmf->pgoff >= kvmppc_tce_pages(stt->size))
return VM_FAULT_SIGBUS;
- page = stt->pages[vmf->pgoff];
+ page = kvm_spapr_get_tce_page(stt, vmf->pgoff);
+ if (!page)
+ return VM_FAULT_OOM;
+
get_page(page);
vmf->page = page;
return 0;
@@ -296,7 +321,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size = args->size;
int ret = -ENOMEM;
- int i;
if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
@@ -318,14 +342,9 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm;
+ mutex_init(&stt->alloc_lock);
INIT_LIST_HEAD_RCU(&stt->iommu_tables);
- for (i = 0; i < npages; i++) {
- stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
- if (!stt->pages[i])
- goto fail;
- }
-
mutex_lock(&kvm->lock);
/* Check this LIOBN hasn't been previously allocated */
@@ -352,17 +371,28 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (ret >= 0)
return ret;
- fail:
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
-
kfree(stt);
fail_acct:
kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
return ret;
}
+static long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
+ unsigned long *ua)
+{
+ unsigned long gfn = tce >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+ return 0;
+}
+
static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
unsigned long tce)
{
@@ -378,7 +408,7 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_TOO_HARD;
- if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+ if (kvmppc_tce_to_ua(stt->kvm, tce, &ua))
return H_TOO_HARD;
list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
@@ -397,6 +427,36 @@ static long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
+/*
+ * Handles TCE requests for emulated devices.
+ * Puts guest TCE values to the table and expects user space to convert them.
+ * Cannot fail so kvmppc_tce_validate must be called before it.
+ */
+static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+ unsigned long idx, unsigned long tce)
+{
+ struct page *page;
+ u64 *tbl;
+ unsigned long sttpage;
+
+ idx -= stt->offset;
+ sttpage = idx / TCES_PER_PAGE;
+ page = stt->pages[sttpage];
+
+ if (!page) {
+ /* We allow any TCE, not just with read|write permissions */
+ if (!tce)
+ return;
+
+ page = kvm_spapr_get_tce_page(stt, sttpage);
+ if (!page)
+ return;
+ }
+ tbl = page_to_virt(page);
+
+ tbl[idx % TCES_PER_PAGE] = tce;
+}
+
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry)
{
@@ -551,7 +611,7 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
dir = iommu_tce_direction(tce);
- if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
+ if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
ret = H_PARAMETER;
goto unlock_exit;
}
@@ -612,7 +672,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
return ret;
idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) {
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua)) {
ret = H_TOO_HARD;
goto unlock_exit;
}
@@ -647,7 +707,7 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
}
tce = be64_to_cpu(tce);
- if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index 2206bc729b9a..484b47fa3960 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -66,8 +66,6 @@
#endif
-#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
-
/*
* Finds a TCE table descriptor by LIOBN.
*
@@ -88,6 +86,25 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
EXPORT_SYMBOL_GPL(kvmppc_find_table);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce,
+ unsigned long *ua, unsigned long **prmap)
+{
+ unsigned long gfn = tce >> PAGE_SHIFT;
+ struct kvm_memory_slot *memslot;
+
+ memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
+ if (!memslot)
+ return -EINVAL;
+
+ *ua = __gfn_to_hva_memslot(memslot, gfn) |
+ (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+
+ if (prmap)
+ *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
+
+ return 0;
+}
+
/*
* Validates TCE address.
* At the moment flags and page mask are validated.
@@ -111,7 +128,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER;
- if (kvmppc_tce_to_ua(stt->kvm, tce, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL))
return H_TOO_HARD;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -129,7 +146,6 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt,
return H_SUCCESS;
}
-#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
/* Note on the use of page_address() in real mode,
*
@@ -161,13 +177,9 @@ static u64 *kvmppc_page_address(struct page *page)
/*
* Handles TCE requests for emulated devices.
* Puts guest TCE values to the table and expects user space to convert them.
- * Called in both real and virtual modes.
- * Cannot fail so kvmppc_tce_validate must be called before it.
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- * mode on PR KVM
+ * Cannot fail so kvmppc_rm_tce_validate must be called before it.
*/
-void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
+static void kvmppc_rm_tce_put(struct kvmppc_spapr_tce_table *stt,
unsigned long idx, unsigned long tce)
{
struct page *page;
@@ -175,35 +187,48 @@ void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
idx -= stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
+ /*
+ * page must not be NULL in real mode,
+ * kvmppc_rm_ioba_validate() must have taken care of this.
+ */
+ WARN_ON_ONCE_RM(!page);
tbl = kvmppc_page_address(page);
tbl[idx % TCES_PER_PAGE] = tce;
}
-EXPORT_SYMBOL_GPL(kvmppc_tce_put);
-long kvmppc_tce_to_ua(struct kvm *kvm, unsigned long tce,
- unsigned long *ua, unsigned long **prmap)
+/*
+ * TCEs pages are allocated in kvmppc_rm_tce_put() which won't be able to do so
+ * in real mode.
+ * Check if kvmppc_rm_tce_put() can succeed in real mode, i.e. a TCEs page is
+ * allocated or not required (when clearing a tce entry).
+ */
+static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt,
+ unsigned long ioba, unsigned long npages, bool clearing)
{
- unsigned long gfn = tce >> PAGE_SHIFT;
- struct kvm_memory_slot *memslot;
+ unsigned long i, idx, sttpage, sttpages;
+ unsigned long ret = kvmppc_ioba_validate(stt, ioba, npages);
- memslot = search_memslots(kvm_memslots(kvm), gfn);
- if (!memslot)
- return -EINVAL;
-
- *ua = __gfn_to_hva_memslot(memslot, gfn) |
- (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE));
+ if (ret)
+ return ret;
+ /*
+ * clearing==true says kvmppc_rm_tce_put won't be allocating pages
+ * for empty tces.
+ */
+ if (clearing)
+ return H_SUCCESS;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- if (prmap)
- *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
-#endif
+ idx = (ioba >> stt->page_shift) - stt->offset;
+ sttpage = idx / TCES_PER_PAGE;
+ sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) /
+ TCES_PER_PAGE;
+ for (i = sttpage; i < sttpage + sttpages; ++i)
+ if (!stt->pages[i])
+ return H_TOO_HARD;
- return 0;
+ return H_SUCCESS;
}
-EXPORT_SYMBOL_GPL(kvmppc_tce_to_ua);
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
static long iommu_tce_xchg_rm(struct mm_struct *mm, struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
@@ -381,7 +406,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (!stt)
return H_TOO_HARD;
- ret = kvmppc_ioba_validate(stt, ioba, 1);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, 1, tce == 0);
if (ret != H_SUCCESS)
return ret;
@@ -390,7 +415,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
return ret;
dir = iommu_tce_direction(tce);
- if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
entry = ioba >> stt->page_shift;
@@ -409,7 +434,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
}
}
- kvmppc_tce_put(stt, entry, tce);
+ kvmppc_rm_tce_put(stt, entry, tce);
return H_SUCCESS;
}
@@ -480,7 +505,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (tce_list & (SZ_4K - 1))
return H_PARAMETER;
- ret = kvmppc_ioba_validate(stt, ioba, npages);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, npages, false);
if (ret != H_SUCCESS)
return ret;
@@ -492,7 +517,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
*/
struct mm_iommu_table_group_mem_t *mem;
- if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL))
return H_TOO_HARD;
mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
@@ -508,7 +533,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
* We do not require memory to be preregistered in this case
* so lock rmap and do __find_linux_pte_or_hugepte().
*/
- if (kvmppc_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
return H_TOO_HARD;
rmap = (void *) vmalloc_to_phys(rmap);
@@ -542,7 +567,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long tce = be64_to_cpu(((u64 *)tces)[i]);
ua = 0;
- if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
+ if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL))
return H_PARAMETER;
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -557,7 +582,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
}
}
- kvmppc_tce_put(stt, entry + i, tce);
+ kvmppc_rm_tce_put(stt, entry + i, tce);
}
unlock_exit:
@@ -583,7 +608,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
if (!stt)
return H_TOO_HARD;
- ret = kvmppc_ioba_validate(stt, ioba, npages);
+ ret = kvmppc_rm_ioba_validate(stt, ioba, npages, tce_value == 0);
if (ret != H_SUCCESS)
return ret;
@@ -610,7 +635,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
}
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
- kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
+ kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
return H_SUCCESS;
}
@@ -635,6 +660,10 @@ long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
idx = (ioba >> stt->page_shift) - stt->offset;
page = stt->pages[idx / TCES_PER_PAGE];
+ if (!page) {
+ vcpu->arch.regs.gpr[4] = 0;
+ return H_SUCCESS;
+ }
tbl = (u64 *)page_address(page);
vcpu->arch.regs.gpr[4] = tbl[idx % TCES_PER_PAGE];
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7bdcd4d7a9f0..d5fc624e0655 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -750,7 +750,7 @@ static bool kvmppc_doorbell_pending(struct kvm_vcpu *vcpu)
/*
* Ensure that the read of vcore->dpdes comes after the read
* of vcpu->doorbell_request. This barrier matches the
- * smb_wmb() in kvmppc_guest_entry_inject().
+ * smp_wmb() in kvmppc_guest_entry_inject().
*/
smp_rmb();
vc = vcpu->arch.vcore;
@@ -802,6 +802,80 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
}
}
+/* Copy guest memory in place - must reside within a single memslot */
+static int kvmppc_copy_guest(struct kvm *kvm, gpa_t to, gpa_t from,
+ unsigned long len)
+{
+ struct kvm_memory_slot *to_memslot = NULL;
+ struct kvm_memory_slot *from_memslot = NULL;
+ unsigned long to_addr, from_addr;
+ int r;
+
+ /* Get HPA for from address */
+ from_memslot = gfn_to_memslot(kvm, from >> PAGE_SHIFT);
+ if (!from_memslot)
+ return -EFAULT;
+ if ((from + len) >= ((from_memslot->base_gfn + from_memslot->npages)
+ << PAGE_SHIFT))
+ return -EINVAL;
+ from_addr = gfn_to_hva_memslot(from_memslot, from >> PAGE_SHIFT);
+ if (kvm_is_error_hva(from_addr))
+ return -EFAULT;
+ from_addr |= (from & (PAGE_SIZE - 1));
+
+ /* Get HPA for to address */
+ to_memslot = gfn_to_memslot(kvm, to >> PAGE_SHIFT);
+ if (!to_memslot)
+ return -EFAULT;
+ if ((to + len) >= ((to_memslot->base_gfn + to_memslot->npages)
+ << PAGE_SHIFT))
+ return -EINVAL;
+ to_addr = gfn_to_hva_memslot(to_memslot, to >> PAGE_SHIFT);
+ if (kvm_is_error_hva(to_addr))
+ return -EFAULT;
+ to_addr |= (to & (PAGE_SIZE - 1));
+
+ /* Perform copy */
+ r = raw_copy_in_user((void __user *)to_addr, (void __user *)from_addr,
+ len);
+ if (r)
+ return -EFAULT;
+ mark_page_dirty(kvm, to >> PAGE_SHIFT);
+ return 0;
+}
+
+static long kvmppc_h_page_init(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long dest, unsigned long src)
+{
+ u64 pg_sz = SZ_4K; /* 4K page size */
+ u64 pg_mask = SZ_4K - 1;
+ int ret;
+
+ /* Check for invalid flags (H_PAGE_SET_LOANED covers all CMO flags) */
+ if (flags & ~(H_ICACHE_INVALIDATE | H_ICACHE_SYNCHRONIZE |
+ H_ZERO_PAGE | H_COPY_PAGE | H_PAGE_SET_LOANED))
+ return H_PARAMETER;
+
+ /* dest (and src if copy_page flag set) must be page aligned */
+ if ((dest & pg_mask) || ((flags & H_COPY_PAGE) && (src & pg_mask)))
+ return H_PARAMETER;
+
+ /* zero and/or copy the page as determined by the flags */
+ if (flags & H_COPY_PAGE) {
+ ret = kvmppc_copy_guest(vcpu->kvm, dest, src, pg_sz);
+ if (ret < 0)
+ return H_PARAMETER;
+ } else if (flags & H_ZERO_PAGE) {
+ ret = kvm_clear_guest(vcpu->kvm, dest, pg_sz);
+ if (ret < 0)
+ return H_PARAMETER;
+ }
+
+ /* We can ignore the remaining flags */
+
+ return H_SUCCESS;
+}
+
static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
{
struct kvmppc_vcore *vcore = target->arch.vcore;
@@ -1004,6 +1078,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
if (nesting_enabled(vcpu->kvm))
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
break;
+ case H_PAGE_INIT:
+ ret = kvmppc_h_page_init(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ break;
default:
return RESUME_HOST;
}
@@ -1048,6 +1127,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
case H_IPOLL:
case H_XIRR_X:
#endif
+ case H_PAGE_INIT:
return 1;
}
@@ -2505,37 +2585,6 @@ static void kvmppc_prepare_radix_vcpu(struct kvm_vcpu *vcpu, int pcpu)
}
}
-static void kvmppc_radix_check_need_tlb_flush(struct kvm *kvm, int pcpu,
- struct kvm_nested_guest *nested)
-{
- cpumask_t *need_tlb_flush;
- int lpid;
-
- if (!cpu_has_feature(CPU_FTR_HVMODE))
- return;
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- pcpu &= ~0x3UL;
-
- if (nested) {
- lpid = nested->shadow_lpid;
- need_tlb_flush = &nested->need_tlb_flush;
- } else {
- lpid = kvm->arch.lpid;
- need_tlb_flush = &kvm->arch.need_tlb_flush;
- }
-
- mtspr(SPRN_LPID, lpid);
- isync();
- smp_mb();
-
- if (cpumask_test_cpu(pcpu, need_tlb_flush)) {
- radix__local_flush_tlb_lpid_guest(lpid);
- /* Clear the bit after the TLB flush */
- cpumask_clear_cpu(pcpu, need_tlb_flush);
- }
-}