summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-19 10:38:36 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-19 10:38:36 -0700
commite61cf2e3a5b452cfefcb145021f5a8ea88735cc1 (patch)
treebbabaf0d4753d6880ecbaddd8daa0164d49c1c61 /arch
parent1009aa1205c2c5e9101437dcadfa195708d863bf (diff)
parent28a1f3ac1d0c8558ee4453d9634dad891a6e922e (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull first set of KVM updates from Paolo Bonzini: "PPC: - minor code cleanups x86: - PCID emulation and CR3 caching for shadow page tables - nested VMX live migration - nested VMCS shadowing - optimized IPI hypercall - some optimizations ARM will come next week" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (85 commits) kvm: x86: Set highest physical address bits in non-present/reserved SPTEs KVM/x86: Use CC_SET()/CC_OUT in arch/x86/kvm/vmx.c KVM: X86: Implement PV IPIs in linux guest KVM: X86: Add kvm hypervisor init time platform setup callback KVM: X86: Implement "send IPI" hypercall KVM/x86: Move X86_CR4_OSXSAVE check into kvm_valid_sregs() KVM: x86: Skip pae_root shadow allocation if tdp enabled KVM/MMU: Combine flushing remote tlb in mmu_set_spte() KVM: vmx: skip VMWRITE of HOST_{FS,GS}_BASE when possible KVM: vmx: skip VMWRITE of HOST_{FS,GS}_SEL when possible KVM: vmx: always initialize HOST_{FS,GS}_BASE to zero during setup KVM: vmx: move struct host_state usage to struct loaded_vmcs KVM: vmx: compute need to reload FS/GS/LDT on demand KVM: nVMX: remove a misleading comment regarding vmcs02 fields KVM: vmx: rename __vmx_load_host_state() and vmx_save_host_state() KVM: vmx: add dedicated utility to access guest's kernel_gs_base KVM: vmx: track host_state.loaded using a loaded_vmcs pointer KVM: vmx: refactor segmentation code in vmx_save_host_state() kvm: nVMX: Fix fault priority for VMX operations kvm: nVMX: Fix fault vector for VMX operation at CPL > 0 ...
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h47
-rw-r--r--arch/powerpc/include/asm/kvm_host.h26
-rw-r--r--arch/powerpc/include/asm/reg.h2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv.c42
-rw-r--r--arch/powerpc/kvm/book3s_xive.c19
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c7
-rw-r--r--arch/powerpc/kvm/powerpc.c30
-rw-r--r--arch/s390/include/asm/kvm_host.h11
-rw-r--r--arch/s390/include/uapi/asm/kvm.h5
-rw-r--r--arch/s390/kvm/kvm-s390.c301
-rw-r--r--arch/s390/kvm/priv.c40
-rw-r--r--arch/s390/kvm/vsie.c11
-rw-r--r--arch/s390/tools/gen_facilities.c3
-rw-r--r--arch/x86/hyperv/Makefile2
-rw-r--r--arch/x86/hyperv/nested.c56
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h8
-rw-r--r--arch/x86/include/asm/kvm_host.h56
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/trace/hyperv.h14
-rw-r--r--arch/x86/include/uapi/asm/kvm.h37
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/kvm.c112
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/hyperv.c27
-rw-r--r--arch/x86/kvm/hyperv.h2
-rw-r--r--arch/x86/kvm/lapic.c40
-rw-r--r--arch/x86/kvm/mmu.c531
-rw-r--r--arch/x86/kvm/mmu.h24
-rw-r--r--arch/x86/kvm/paging_tmpl.h28
-rw-r--r--arch/x86/kvm/svm.c12
-rw-r--r--arch/x86/kvm/vmx.c1120
-rw-r--r--arch/x86/kvm/x86.c108
34 files changed, 2185 insertions, 549 deletions
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 1f345a0b6ba2..83a9aa3cf689 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -390,4 +390,51 @@ extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
#define SPLIT_HACK_MASK 0xff000000
#define SPLIT_HACK_OFFS 0xfb000000
+/*
+ * This packs a VCPU ID from the [0..KVM_MAX_VCPU_ID) space down to the
+ * [0..KVM_MAX_VCPUS) space, using knowledge of the guest's core stride
+ * (but not its actual threading mode, which is not available) to avoid
+ * collisions.
+ *
+ * The implementation leaves VCPU IDs from the range [0..KVM_MAX_VCPUS) (block
+ * 0) unchanged: if the guest is filling each VCORE completely then it will be
+ * using consecutive IDs and it will fill the space without any packing.
+ *
+ * For higher VCPU IDs, the packed ID is based on the VCPU ID modulo
+ * KVM_MAX_VCPUS (effectively masking off the top bits) and then an offset is
+ * added to avoid collisions.
+ *
+ * VCPU IDs in the range [KVM_MAX_VCPUS..(KVM_MAX_VCPUS*2)) (block 1) are only
+ * possible if the guest is leaving at least 1/2 of each VCORE empty, so IDs
+ * can be safely packed into the second half of each VCORE by adding an offset
+ * of (stride / 2).
+ *
+ * Similarly, if VCPU IDs in the range [(KVM_MAX_VCPUS*2)..(KVM_MAX_VCPUS*4))
+ * (blocks 2 and 3) are seen, the guest must be leaving at least 3/4 of each
+ * VCORE empty so packed IDs can be offset by (stride / 4) and (stride * 3 / 4).
+ *
+ * Finally, VCPU IDs from blocks 5..7 will only be seen if the guest is using a
+ * stride of 8 and 1 thread per core so the remaining offsets of 1, 5, 3 and 7
+ * must be free to use.
+ *
+ * (The offsets for each block are stored in block_offsets[], indexed by the
+ * block number if the stride is 8. For cases where the guest's stride is less
+ * than 8, we can re-use the block_offsets array by multiplying the block
+ * number by (MAX_SMT_THREADS / stride) to reach the correct entry.)
+ */
+static inline u32 kvmppc_pack_vcpu_id(struct kvm *kvm, u32 id)
+{
+ const int block_offsets[MAX_SMT_THREADS] = {0, 4, 2, 6, 1, 5, 3, 7};
+ int stride = kvm->arch.emul_smt_mode;
+ int block = (id / KVM_MAX_VCPUS) * (MAX_SMT_THREADS / stride);
+ u32 packed_id;
+
+ if (WARN_ONCE(block >= MAX_SMT_THREADS, "VCPU ID too large to pack"))
+ return 0;
+ packed_id = (id % KVM_MAX_VCPUS) + block_offsets[block];
+ if (WARN_ONCE(packed_id >= KVM_MAX_VCPUS, "VCPU ID packing failed"))
+ return 0;
+ return packed_id;
+}
+
#endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index fa4efa7e88f7..906bcbdfd2a1 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -42,7 +42,14 @@
#define KVM_USER_MEM_SLOTS 512
#include <asm/cputhreads.h>
-#define KVM_MAX_VCPU_ID (threads_per_subcore * KVM_MAX_VCORES)
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+#include <asm/kvm_book3s_asm.h> /* for MAX_SMT_THREADS */
+#define KVM_MAX_VCPU_ID (MAX_SMT_THREADS * KVM_MAX_VCORES)
+
+#else
+#define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
+#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -672,7 +679,7 @@ struct kvm_vcpu_arch {
gva_t vaddr_accessed;
pgd_t *pgdir;
- u8 io_gpr; /* GPR used as IO source/target */
+ u16 io_gpr; /* GPR used as IO source/target */
u8 mmio_host_swabbed;
u8 mmio_sign_extend;
/* conversion between single and double precision */
@@ -688,7 +695,6 @@ struct kvm_vcpu_arch {
*/
u8 mmio_vsx_copy_nums;
u8 mmio_vsx_offset;
- u8 mmio_vsx_tx_sx_enabled;
u8 mmio_vmx_copy_nums;
u8 mmio_vmx_offset;
u8 mmio_copy_type;
@@ -801,14 +807,14 @@ struct kvm_vcpu_arch {
#define KVMPPC_VCPU_BUSY_IN_HOST 2
/* Values for vcpu->arch.io_gpr */
-#define KVM_MMIO_REG_MASK 0x001f
-#define KVM_MMIO_REG_EXT_MASK 0xffe0
+#define KVM_MMIO_REG_MASK 0x003f
+#define KVM_MMIO_REG_EXT_MASK 0xffc0
#define KVM_MMIO_REG_GPR 0x0000
-#define KVM_MMIO_REG_FPR 0x0020
-#define KVM_MMIO_REG_QPR 0x0040
-#define KVM_MMIO_REG_FQPR 0x0060
-#define KVM_MMIO_REG_VSX 0x0080
-#define KVM_MMIO_REG_VMX 0x00c0
+#define KVM_MMIO_REG_FPR 0x0040
+#define KVM_MMIO_REG_QPR 0x0080
+#define KVM_MMIO_REG_FQPR 0x00c0
+#define KVM_MMIO_REG_VSX 0x0100
+#define KVM_MMIO_REG_VMX 0x0180
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 486b7c83b8c5..e5b314ed054e 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -163,7 +163,7 @@
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
-#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
+#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index e8afdd4f4814..9a3f2646ecc7 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -179,7 +179,7 @@ extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
if ((tbltmp->it_page_shift <= stt->page_shift) &&
(tbltmp->it_offset << tbltmp->it_page_shift ==
stt->offset << stt->page_shift) &&
- (tbltmp->it_size << tbltmp->it_page_shift ==
+ (tbltmp->it_size << tbltmp->it_page_shift >=
stt->size << stt->page_shift)) {
/*
* Reference the table to avoid races with
@@ -295,7 +295,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
{
struct kvmppc_spapr_tce_table *stt = NULL;
struct kvmppc_spapr_tce_table *siter;
- unsigned long npages, size;
+ unsigned long npages, size = args->size;
int ret = -ENOMEM;
int i;
@@ -303,7 +303,6 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
return -EINVAL;
- size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 96f1cc6c97b7..574fc1dcb2bf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -127,14 +127,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
* and SPURR count and should be set according to the number of
* online threads in the vcore being run.
*/
-#define RWMR_RPA_P8_1THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9
-#define RWMR_RPA_P8_3THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9
-#define RWMR_RPA_P8_5THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_6THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_7THREAD 0x164520C62609AECA
-#define RWMR_RPA_P8_8THREAD 0x164520C62609AECA
+#define RWMR_RPA_P8_1THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_2THREAD 0x7FFF2908450D8DA9UL
+#define RWMR_RPA_P8_3THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_4THREAD 0x199A421245058DA9UL
+#define RWMR_RPA_P8_5THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_6THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_7THREAD 0x164520C62609AECAUL
+#define RWMR_RPA_P8_8THREAD 0x164520C62609AECAUL
static unsigned long p8_rwmr_values[MAX_SMT_THREADS + 1] = {
RWMR_RPA_P8_1THREAD,
@@ -1807,7 +1807,7 @@ static int threads_per_vcore(struct kvm *kvm)
return threads_per_subcore;
}
-static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id)
{
struct kvmppc_vcore *vcore;
@@ -1821,7 +1821,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
- vcore->first_vcpuid = core * kvm->arch.smt_mode;
+ vcore->first_vcpuid = id;
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@@ -2037,12 +2037,26 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
mutex_lock(&kvm->lock);
vcore = NULL;
err = -EINVAL;
- core = id / kvm->arch.smt_mode;
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (id >= (KVM_MAX_VCPUS * kvm->arch.emul_smt_mode)) {
+ pr_devel("KVM: VCPU ID too high\n");
+ core = KVM_MAX_VCORES;
+ } else {
+ BUG_ON(kvm->arch.smt_mode != 1);
+ core = kvmppc_pack_vcpu_id(kvm, id);
+ }
+ } else {
+ core = id / kvm->arch.smt_mode;
+ }
if (core < KVM_MAX_VCORES) {
vcore = kvm->arch.vcores[core];
- if (!vcore) {
+ if (vcore && cpu_has_feature(CPU_FTR_ARCH_300)) {
+ pr_devel("KVM: collision on id %u", id);
+ vcore = NULL;
+ } else if (!vcore) {
err = -ENOMEM;
- vcore = kvmppc_vcore_create(kvm, core);
+ vcore = kvmppc_vcore_create(kvm,
+ id & ~(kvm->arch.smt_mode - 1));
kvm->arch.vcores[core] = vcore;
kvm->arch.online_vcores++;
}
@@ -4550,6 +4564,8 @@ static int kvmppc_book3s_init_hv(void)
pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
return -ENODEV;
}
+ /* presence of intc confirmed - node can be dropped again */
+ of_node_put(np);
}
#endif
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index f9818d7d3381..126f02b3ffb8 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -317,6 +317,11 @@ static int xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
return -EBUSY;
}
+static u32 xive_vp(struct kvmppc_xive *xive, u32 server)
+{
+ return xive->vp_base + kvmppc_pack_vcpu_id(xive->kvm, server);
+}
+
static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
struct kvmppc_xive_src_block *sb,
struct kvmppc_xive_irq_state *state)
@@ -362,7 +367,7 @@ static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
- xive->vp_base + state->act_server,
+ xive_vp(xive, state->act_server),
MASKED, state->number);
/* set old_p so we can track if an H_EOI was done */
state->old_p = true;
@@ -418,7 +423,7 @@ static void xive_finish_unmask(struct kvmppc_xive *xive,
*/
if (xd->flags & OPAL_XIVE_IRQ_MASK_VIA_FW) {
xive_native_configure_irq(hw_num,
- xive->vp_base + state->act_server,
+ xive_vp(xive, state->act_server),
state->act_priority, state->number);
/* If an EOI is needed, do it here */
if (!state->old_p)
@@ -495,7 +500,7 @@ static int xive_target_interrupt(struct kvm *kvm,
kvmppc_xive_select_irq(state, &hw_num, NULL);
return xive_native_configure_irq(hw_num,
- xive->vp_base + server,
+ xive_vp(xive, server),
prio, state->number);
}
@@ -883,7 +888,7 @@ int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
* which is fine for a never started interrupt.
*/
xive_native_configure_irq(hw_irq,
- xive->vp_base + state->act_server,
+ xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
@@ -959,7 +964,7 @@ int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
/* Reconfigure the IPI */
xive_native_configure_irq(state->ipi_number,
- xive->vp_base + state->act_server,
+ xive_vp(xive, state->act_server),
state->act_priority, state->number);
/*
@@ -1084,7 +1089,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
pr_devel("Duplicate !\n");
return -EEXIST;
}
- if (cpu >= KVM_MAX_VCPUS) {
+ if (cpu >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
pr_devel("Out of bounds !\n");
return -EINVAL;
}
@@ -1098,7 +1103,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
xc->xive = xive;
xc->vcpu = vcpu;
xc->server_num = cpu;
- xc->vp_id = xive->vp_base + cpu;
+ xc->vp_id = xive_vp(xive, cpu);
xc->mfrr = 0xff;
xc->valid = true;
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index afde788be141..75dce1ef3bc8 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -106,7 +106,6 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
* if mmio_vsx_tx_sx_enabled == 1, copy data between
* VSR[32..63] and memory
*/
- vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
@@ -242,8 +241,8 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
}
emulated = kvmppc_handle_vsx_load(run, vcpu,
- KVM_MMIO_REG_VSX | (op.reg & 0x1f),
- io_size_each, 1, op.type & SIGNEXT);
+ KVM_MMIO_REG_VSX|op.reg, io_size_each,
+ 1, op.type & SIGNEXT);
break;
}
#endif
@@ -363,7 +362,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
}
emulated = kvmppc_handle_vsx_store(run, vcpu,
- op.reg & 0x1f, io_size_each, 1);
+ op.reg, io_size_each, 1);
break;
}
#endif
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3ccc386b380d..eba5756d5b41 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -879,10 +879,10 @@ static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
if (offset == -1)
return;
- if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
- val.vval = VCPU_VSX_VR(vcpu, index);
+ if (index >= 32) {
+ val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsxval[offset] = gpr;
- VCPU_VSX_VR(vcpu, index) = val.vval;
+ VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
VCPU_VSX_FPR(vcpu, index, offset) = gpr;
}
@@ -894,11 +894,11 @@ static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
union kvmppc_one_reg val;
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
- if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
- val.vval = VCPU_VSX_VR(vcpu, index);
+ if (index >= 32) {
+ val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsxval[0] = gpr;
val.vsxval[1] = gpr;
- VCPU_VSX_VR(vcpu, index) = val.vval;
+ VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
VCPU_VSX_FPR(vcpu, index, 0) = gpr;
VCPU_VSX_FPR(vcpu, index, 1) = gpr;
@@ -911,12 +911,12 @@ static inline void kvmppc_set_vsr_word_dump(struct kvm_vcpu *vcpu,
union kvmppc_one_reg val;
int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
- if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ if (index >= 32) {
val.vsx32val[0] = gpr;
val.vsx32val[1] = gpr;
val.vsx32val[2] = gpr;
val.vsx32val[3] = gpr;
- VCPU_VSX_VR(vcpu, index) = val.vval;
+ VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
val.vsx32val[0] = gpr;
val.vsx32val[1] = gpr;
@@ -936,10 +936,10 @@ static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
if (offset == -1)
return;
- if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
- val.vval = VCPU_VSX_VR(vcpu, index);
+ if (index >= 32) {
+ val.vval = VCPU_VSX_VR(vcpu, index - 32);
val.vsx32val[offset] = gpr32;
- VCPU_VSX_VR(vcpu, index) = val.vval;
+ VCPU_VSX_VR(vcpu, index - 32) = val.vval;
} else {
dword_offset = offset / 2;
word_offset = offset % 2;
@@ -1360,10 +1360,10 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
break;
}
- if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ if (rs < 32) {
*val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
} else {
- reg.vval = VCPU_VSX_VR(vcpu, rs);
+ reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
*val = reg.vsxval[vsx_offset];
}
break;
@@ -1377,13 +1377,13 @@ static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
break;
}
- if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ if (rs < 32) {
dword_offset = vsx_offset / 2;
word_offset = vsx_offset % 2;
reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
*val = reg.vsx32val[word_offset];
} else {
- reg.vval = VCPU_VSX_VR(vcpu, rs);
+ reg.vval = VCPU_VSX_VR(vcpu, rs - 32);
*val = reg.vsx32val[vsx_offset];
}
break;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a2188e309bd6..29c940bf8506 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -269,6 +269,7 @@ struct kvm_s390_sie_block {
__u8 reserved1c0[8]; /* 0x01c0 */
#define ECD_HOSTREGMGMT 0x20000000
#define ECD_MEF 0x08000000
+#define ECD_ETOKENF 0x02000000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
@@ -655,6 +656,7 @@ struct kvm_vcpu_arch {
seqcount_t cputm_seqcount;
__u64 cputm_start;
bool gs_enabled;
+ bool skey_enabled;
};
struct kvm_vm_stat {
@@ -793,12 +795,6 @@ struct kvm_s390_vsie {
struct page *pages[KVM_MAX_VCPUS];
};
-struct kvm_s390_migration_state {
- unsigned long bitmap_size; /* in bits (number of guest pages) */
- atomic64_t dirty_pages; /* number of dirty pages */
- unsigned long *pgste_bitmap;
-};
-
struct kvm_arch{
void *sca;
int use_esca;
@@ -828,7 +824,8 @@ struct kvm_arch{
struct kvm_s390_vsie vsie;
u8 epdx;
u64 epoch;
- struct kvm_s390_migration_state *migration_state;
+ int migration_mode;
+ atomic64_t cmma_dirty_pages;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
struct kvm_s390_gisa *gisa;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 4cdaa55fabfe..9a50f02b9894 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -4,7 +4,7 @@
/*
* KVM s390 specific structures and definitions
*
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008, 2018
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
* Christian Borntraeger <borntraeger@de.ibm.com>
@@ -225,6 +225,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_FPRS (1UL << 8)
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
#define SDNXL (1UL << SDNXC)
@@ -258,6 +259,8 @@ struct kvm_sync_regs {
struct {
__u64 reserved1[2];
__u64 gscb[4];
+ __u64 etoken;
+ __u64 etoken_extension;
};
};
};
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f9d90337e64a..91ad4a9425c0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -906,54 +906,37 @@ static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
*/
static int kvm_s390_vm_start_migration(struct kvm *kvm)
{
- struct kvm_s390_migration_state *mgs;
struct kvm_memory_slot *ms;
- /* should be the only one */
struct kvm_memslots *slots;
- unsigned long ram_pages;
+ unsigned long ram_pages = 0;
int slotnr;
/* migration mode already enabled */
- if (kvm->arch.migration_state)
+ if (kvm->arch.migration_mode)
return 0;
-
slots = kvm_memslots(kvm);
if (!slots || !slots->used_slots)
return -EINVAL;
- mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
- if (!mgs)
- return -ENOMEM;
- kvm->arch.migration_state = mgs;
-
- if (kvm->arch.use_cmma) {
+ if (!kvm->arch.use_cmma) {
+ kvm->arch.migration_mode = 1;
+ return 0;
+ }
+ /* mark all the pages in active slots as dirty */
+ for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
+ ms = slots->memslots + slotnr;
/*
- * Get the first slot. They are reverse sorted by base_gfn, so
- * the first slot is also the one at the end of the address
- * space. We have verified above that at least one slot is
- * present.
+ * The second half of the bitmap is only used on x86,
+ * and would be wasted otherwise, so we put it to good
+ * use here to keep track of the state of the storage
+ * attributes.
*/
- ms = slots->memslots;
- /* round up so we only use full longs */
- ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
- /* allocate enough bytes to store all the bits */
- mgs->pgste_bitmap = vmalloc(ram_pages / 8);
- if (!mgs->pgste_bitmap) {
- kfree(mgs);
- kvm->arch.migration_state = NULL;
- return -ENOMEM;
- }
-
- mgs->bitmap_size = ram_pages;
- atomic64_set(&mgs->dirty_pages, ram_pages);
- /* mark all the pages in active slots as dirty */
- for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
- ms = slots->memslots + slotnr;
- bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
- }
-
- kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
+ memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
+ ram_pages += ms->npages;
}
+ atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
+ kvm->arch.migration_mode = 1;
+ kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
return 0;
}
@@ -963,21 +946,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
*/
static int kvm_s390_vm_stop_migration(struct kvm *kvm)
{
- struct kvm_s390_migration_state *mgs;
-
/* migration mode already disabled */
- if (!kvm->arch.migration_state)
+ if (!kvm->arch.migration_mode)
return 0;
- mgs = kvm->arch.migration_state;
- kvm->arch.migration_state = NULL;
-
- if (kvm->arch.use_cmma) {
+ kvm->arch.migration_mode = 0;
+ if (kvm->arch.use_cmma)
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
- /* We have to wait for the essa emulation to finish */
- synchronize_srcu(&kvm->srcu);
- vfree(mgs->pgste_bitmap);
- }
- kfree(mgs);
return 0;
}
@@ -1005,7 +979,7 @@ static int kvm_s390_vm_set_migration(struct kvm *kvm,
static int kvm_s390_vm_get_migration(struct kvm *kvm,
struct kvm_device_attr *attr)
{
- u64 mig = (kvm->arch.migration_state != NULL);
+ u64 mig = kvm->arch.migration_mode;
if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
return -ENXIO;
@@ -1653,6 +1627,134 @@ out:
#define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
/*
+ * Similar to gfn_to_memslot, but returns the index of a memslot also when the
+ * address falls in a hole. In that case the index of one of the memslots
+ * bordering the hole is returned.
+ */
+static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
+{
+ int start = 0, end = slots->used_slots;
+ int slot = atomic_read(&slots->lru_slot);
+ struct kvm_memory_slot *memslots = slots->memslots;
+
+ if (gfn >= memslots[slot].base_gfn &&
+ gfn < memslots[slot].base_gfn + memslots[slot].npages)
+ return slot;
+
+ while (start < end) {
+ slot = start + (end - start) / 2;
+
+ if (gfn >= memslots[slot].base_gfn)
+ end = slot;
+ else
+ start = slot + 1;
+ }
+
+ if (gfn >= memslots[start].base_gfn &&
+ gfn < memslots[start].base_gfn + memslots[start].npages) {
+ atomic_set(&slots->lru_slot, start);
+ }
+
+ return start;
+}
+
+static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+ u8 *res, unsigned long bufsize)
+{
+ unsigned long pgstev, hva, cur_gfn = args->start_gfn;
+
+ args->count = 0;
+ while (args->count < bufsize) {
+ hva = gfn_to_hva(kvm, cur_gfn);
+ /*
+ * We return an error if the first value was invalid, but we
+ * return successfully if at least one value was copied.
+ */
+ if (kvm_is_error_hva(hva))
+ return args->count ? 0 : -EFAULT;
+ if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+ pgstev = 0;
+ res[args->count++] = (pgstev >> 24) & 0x43;
+ cur_gfn++;
+ }
+
+ return 0;
+}
+
+static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots,
+ unsigned long cur_gfn)
+{
+ int slotidx = gfn_to_memslot_approx(slots, cur_gfn);
+ struct kvm_memory_slot *ms = slots->memslots + slotidx;
+ unsigned long ofs = cur_gfn - ms->base_gfn;
+
+ if (ms->base_gfn + ms->npages <= cur_gfn) {
+ slotidx--;
+ /* If we are above the highest slot, wrap around */
+ if (slotidx < 0)
+ slotidx = slots->used_slots - 1;
+
+ ms = slots->memslots + slotidx;
+ ofs = 0;
+ }
+ ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs);
+ while ((slotidx > 0) && (ofs >= ms->npages)) {
+ slotidx--;
+ ms = slots->memslots + slotidx;
+ ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0);
+ }
+ return ms->base_gfn + ofs;
+}
+
+static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args,
+ u8 *res, unsigned long bufsize)
+{
+ unsigned long mem_end, cur_gfn, next_gfn, hva, pgstev;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *ms;
+
+ cur_gfn = kvm_s390_next_dirty_cmma(slots, args->start_gfn);
+ ms = gfn_to_memslot(kvm, cur_gfn);
+ args->count = 0;
+ args->start_gfn = cur_gfn;
+ if (!ms)
+ return 0;
+ next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1);
+ mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages;
+
+ while (args->count < bufsize) {
+ hva = gfn_to_hva(kvm, cur_gfn);
+ if (kvm_is_error_hva(hva))
+ return 0;
+ /* Decrement only if we actually flipped the bit to 0 */
+ if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
+ atomic64_dec(&kvm->arch.cmma_dirty_pages);
+ if (get_pgste(kvm->mm, hva, &pgstev) < 0)
+ pgstev = 0;
+ /* Save the value */
+ res[args->c