summaryrefslogtreecommitdiffstats
path: root/arch/s390/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-02 16:11:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-02 16:11:27 -0400
commit221bb8a46e230b9824204ae86537183d9991ff2a (patch)
tree92510d72285b2285be7cb87288bf088cb28af4c1 /arch/s390/kvm
parentf7b32e4c021fd788f13f6785e17efbc3eb05b351 (diff)
parent23528bb21ee2c9b27f3feddd77a2a3351a8df148 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: - ARM: GICv3 ITS emulation and various fixes. Removal of the old VGIC implementation. - s390: support for trapping software breakpoints, nested virtualization (vSIE), the STHYI opcode, initial extensions for CPU model support. - MIPS: support for MIPS64 hosts (32-bit guests only) and lots of cleanups, preliminary to this and the upcoming support for hardware virtualization extensions. - x86: support for execute-only mappings in nested EPT; reduced vmexit latency for TSC deadline timer (by about 30%) on Intel hosts; support for more than 255 vCPUs. - PPC: bugfixes. * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (302 commits) KVM: PPC: Introduce KVM_CAP_PPC_HTM MIPS: Select HAVE_KVM for MIPS64_R{2,6} MIPS: KVM: Reset CP0_PageMask during host TLB flush MIPS: KVM: Fix ptr->int cast via KVM_GUEST_KSEGX() MIPS: KVM: Sign extend MFC0/RDHWR results MIPS: KVM: Fix 64-bit big endian dynamic translation MIPS: KVM: Fail if ebase doesn't fit in CP0_EBase MIPS: KVM: Use 64-bit CP0_EBase when appropriate MIPS: KVM: Set CP0_Status.KX on MIPS64 MIPS: KVM: Make entry code MIPS64 friendly MIPS: KVM: Use kmap instead of CKSEG0ADDR() MIPS: KVM: Use virt_to_phys() to get commpage PFN MIPS: Fix definition of KSEGX() for 64-bit KVM: VMX: Add VMCS to CPU's loaded VMCSs before VMPTRLD kvm: x86: nVMX: maintain internal copy of current VMCS KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures KVM: arm64: vgic-its: Simplify MAPI error handling KVM: arm64: vgic-its: Make vgic_its_cmd_handle_mapi similar to other handlers KVM: arm64: vgic-its: Turn device_id validation into generic ID validation ...
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c5
-rw-r--r--arch/s390/kvm/gaccess.c387
-rw-r--r--arch/s390/kvm/gaccess.h3
-rw-r--r--arch/s390/kvm/guestdbg.c19
-rw-r--r--arch/s390/kvm/intercept.c33
-rw-r--r--arch/s390/kvm/interrupt.c34
-rw-r--r--arch/s390/kvm/kvm-s390.c404
-rw-r--r--arch/s390/kvm/kvm-s390.h22
-rw-r--r--arch/s390/kvm/priv.c226
-rw-r--r--arch/s390/kvm/sigp.c10
-rw-r--r--arch/s390/kvm/sthyi.c471
-rw-r--r--arch/s390/kvm/trace.h49
-rw-r--r--arch/s390/kvm/vsie.c1091
14 files changed, 2573 insertions, 183 deletions
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index d42fa38c2429..09a9e6dfc09f 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o
+kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o vsie.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 1ea4095b67d7..ce865bd4f81d 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -212,6 +212,11 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
(vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
return -EOPNOTSUPP;
+ VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx",
+ (u32) vcpu->run->s.regs.gprs[2],
+ (u32) vcpu->run->s.regs.gprs[3],
+ vcpu->run->s.regs.gprs[4]);
+
/*
* The layout is as follows:
* - gpr 2 contains the subchannel id (passed as addr)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 66938d283b77..54200208bf24 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -8,6 +8,7 @@
#include <linux/vmalloc.h>
#include <linux/err.h>
#include <asm/pgtable.h>
+#include <asm/gmap.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include <asm/switch_to.h>
@@ -476,18 +477,73 @@ enum {
FSI_FETCH = 2 /* Exception was due to fetch operation */
};
-static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
- ar_t ar, enum gacc_mode mode)
+enum prot_type {
+ PROT_TYPE_LA = 0,
+ PROT_TYPE_KEYC = 1,
+ PROT_TYPE_ALC = 2,
+ PROT_TYPE_DAT = 3,
+};
+
+static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
+ ar_t ar, enum gacc_mode mode, enum prot_type prot)
{
- int rc;
- struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
- struct trans_exc_code_bits *tec_bits;
+ struct trans_exc_code_bits *tec;
memset(pgm, 0, sizeof(*pgm));
- tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
- tec_bits->as = psw.as;
+ pgm->code = code;
+ tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+
+ switch (code) {
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ /*
+ * op_access_id only applies to MOVE_PAGE -> set bit 61
+ * exc_access_id has to be set to 0 for some instructions. Both
+ * cases have to be handled by the caller. We can always store
+ * exc_access_id, as it is undefined for non-ar cases.
+ */
+ tec->addr = gva >> PAGE_SHIFT;
+ tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ /* FALL THROUGH */
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_EXTENDED_AUTHORITY:
+ pgm->exc_access_id = ar;
+ break;
+ case PGM_PROTECTION:
+ switch (prot) {
+ case PROT_TYPE_ALC:
+ tec->b60 = 1;
+ /* FALL THROUGH */
+ case PROT_TYPE_DAT:
+ tec->b61 = 1;
+ tec->addr = gva >> PAGE_SHIFT;
+ tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ /* exc_access_id is undefined for most cases */
+ pgm->exc_access_id = ar;
+ break;
+ default: /* LA and KEYC set b61 to 0, other params undefined */
+ break;
+ }
+ break;
+ }
+ return code;
+}
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+ unsigned long ga, ar_t ar, enum gacc_mode mode)
+{
+ int rc;
+ struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
if (!psw.t) {
asce->val = 0;
@@ -510,21 +566,8 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
return 0;
case PSW_AS_ACCREG:
rc = ar_translation(vcpu, asce, ar, mode);
- switch (rc) {
- case PGM_ALEN_TRANSLATION:
- case PGM_ALE_SEQUENCE:
- case PGM_ASTE_VALIDITY:
- case PGM_ASTE_SEQUENCE:
- case PGM_EXTENDED_AUTHORITY:
- vcpu->arch.pgm.exc_access_id = ar;
- break;
- case PGM_PROTECTION:
- tec_bits->b60 = 1;
- tec_bits->b61 = 1;
- break;
- }
if (rc > 0)
- pgm->code = rc;
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
return rc;
}
return 0;
@@ -729,40 +772,31 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}
-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar,
unsigned long *pages, unsigned long nr_pages,
const union asce asce, enum gacc_mode mode)
{
- struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- struct trans_exc_code_bits *tec_bits;
- int lap_enabled, rc;
+ int lap_enabled, rc = 0;
- tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
lap_enabled = low_address_protection_enabled(vcpu, asce);
while (nr_pages) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
- tec_bits->addr = ga >> PAGE_SHIFT;
- if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) {
- pgm->code = PGM_PROTECTION;
- return pgm->code;
- }
+ if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
+ return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
+ PROT_TYPE_LA);
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
rc = guest_translate(vcpu, ga, pages, asce, mode);
if (rc < 0)
return rc;
- if (rc == PGM_PROTECTION)
- tec_bits->b61 = 1;
- if (rc)
- pgm->code = rc;
} else {
*pages = kvm_s390_real_to_abs(vcpu, ga);
if (kvm_is_error_gpa(vcpu->kvm, *pages))
- pgm->code = PGM_ADDRESSING;
+ rc = PGM_ADDRESSING;
}
- if (pgm->code)
- return pgm->code;
+ if (rc)
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
ga += PAGE_SIZE;
pages++;
nr_pages--;
@@ -783,7 +817,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
if (!len)
return 0;
- rc = get_vcpu_asce(vcpu, &asce, ar, mode);
+ ga = kvm_s390_logical_to_effective(vcpu, ga);
+ rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
@@ -795,7 +830,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
need_ipte_lock = psw_bits(*psw).t && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode);
+ rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
gpa = *(pages + idx) + (ga & ~PAGE_MASK);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
@@ -846,37 +881,28 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
unsigned long *gpa, enum gacc_mode mode)
{
- struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- struct trans_exc_code_bits *tec;
union asce asce;
int rc;
gva = kvm_s390_logical_to_effective(vcpu, gva);
- tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- rc = get_vcpu_asce(vcpu, &asce, ar, mode);
- tec->addr = gva >> PAGE_SHIFT;
+ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (mode == GACC_STORE) {
- rc = pgm->code = PGM_PROTECTION;
- return rc;
- }
+ if (mode == GACC_STORE)
+ return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
+ mode, PROT_TYPE_LA);
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
rc = guest_translate(vcpu, gva, gpa, asce, mode);
- if (rc > 0) {
- if (rc == PGM_PROTECTION)
- tec->b61 = 1;
- pgm->code = rc;
- }
+ if (rc > 0)
+ return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
} else {
- rc = 0;
*gpa = kvm_s390_real_to_abs(vcpu, gva);
if (kvm_is_error_gpa(vcpu->kvm, *gpa))
- rc = pgm->code = PGM_ADDRESSING;
+ return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
}
return rc;
@@ -915,20 +941,247 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
*/
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
{
- struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- struct trans_exc_code_bits *tec_bits;
union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
if (!ctlreg0.lap || !is_low_address(gra))
return 0;
+ return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
+}
- memset(pgm, 0, sizeof(*pgm));
- tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = FSI_STORE;
- tec_bits->as = psw_bits(*psw).as;
- tec_bits->addr = gra >> PAGE_SHIFT;
- pgm->code = PGM_PROTECTION;
+/**
+ * kvm_s390_shadow_tables - walk the guest page table and create shadow tables
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ * @pgt: pointer to the page table address result
+ * @fake: pgt references contiguous guest memory block, not a pgtable
+ */
+static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
+ unsigned long *pgt, int *dat_protection,
+ int *fake)
+{
+ struct gmap *parent;
+ union asce asce;
+ union vaddress vaddr;
+ unsigned long ptr;
+ int rc;
+
+ *fake = 0;
+ *dat_protection = 0;
+ parent = sg->parent;
+ vaddr.addr = saddr;
+ asce.val = sg->orig_asce;
+ ptr = asce.origin * 4096;
+ if (asce.r) {
+ *fake = 1;
+ asce.dt = ASCE_TYPE_REGION1;
+ }
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1:
+ if (vaddr.rfx01 > asce.tl && !asce.r)
+ return PGM_REGION_FIRST_TRANS;
+ break;
+ case ASCE_TYPE_REGION2:
+ if (vaddr.rfx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rsx01 > asce.tl)
+ return PGM_REGION_SECOND_TRANS;
+ break;
+ case ASCE_TYPE_REGION3:
+ if (vaddr.rfx || vaddr.rsx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rtx01 > asce.tl)
+ return PGM_REGION_THIRD_TRANS;
+ break;
+ case ASCE_TYPE_SEGMENT:
+ if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.sx01 > asce.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ break;
+ }
+
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1: {
+ union region1_table_entry rfte;
- return pgm->code;
+ if (*fake) {
+ /* offset in 16EB guest memory block */
+ ptr = ptr + ((unsigned long) vaddr.rsx << 53UL);
+ rfte.val = ptr;
+ goto shadow_r2t;
+ }
+ rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
+ if (rc)
+ return rc;
+ if (rfte.i)
+ return PGM_REGION_FIRST_TRANS;
+ if (rfte.tt != TABLE_TYPE_REGION1)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+ return PGM_REGION_SECOND_TRANS;
+ if (sg->edat_level >= 1)
+ *dat_protection |= rfte.p;
+ ptr = rfte.rto << 12UL;
+shadow_r2t:
+ rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake);
+ if (rc)
+ return rc;
+ /* fallthrough */
+ }
+ case ASCE_TYPE_REGION2: {
+ union region2_table_entry rste;
+
+ if (*fake) {
+ /* offset in 8PB guest memory block */
+ ptr = ptr + ((unsigned long) vaddr.rtx << 42UL);
+ rste.val = ptr;
+ goto shadow_r3t;
+ }
+ rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
+ if (rc)
+ return rc;
+ if (rste.i)
+ return PGM_REGION_SECOND_TRANS;
+ if (rste.tt != TABLE_TYPE_REGION2)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+ return PGM_REGION_THIRD_TRANS;
+ if (sg->edat_level >= 1)
+ *dat_protection |= rste.p;
+ ptr = rste.rto << 12UL;
+shadow_r3t:
+ rste.p |= *dat_protection;
+ rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake);
+ if (rc)
+ return rc;
+ /* fallthrough */
+ }
+ case ASCE_TYPE_REGION3: {
+ union region3_table_entry rtte;
+
+ if (*fake) {
+ /* offset in 4TB guest memory block */
+ ptr = ptr + ((unsigned long) vaddr.sx << 31UL);
+ rtte.val = ptr;
+ goto shadow_sgt;
+ }
+ rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
+ if (rc)
+ return rc;
+ if (rtte.i)
+ return PGM_REGION_THIRD_TRANS;
+ if (rtte.tt != TABLE_TYPE_REGION3)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.cr && asce.p && sg->edat_level >= 2)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.fc && sg->edat_level >= 2) {
+ *dat_protection |= rtte.fc0.p;
+ *fake = 1;
+ ptr = rtte.fc1.rfaa << 31UL;
+ rtte.val = ptr;
+ goto shadow_sgt;
+ }
+ if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ if (sg->edat_level >= 1)
+ *dat_protection |= rtte.fc0.p;
+ ptr = rtte.fc0.sto << 12UL;
+shadow_sgt:
+ rtte.fc0.p |= *dat_protection;
+ rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake);
+ if (rc)
+ return rc;
+ /* fallthrough */
+ }
+ case ASCE_TYPE_SEGMENT: {
+ union segment_table_entry ste;
+
+ if (*fake) {
+ /* offset in 2G guest memory block */
+ ptr = ptr + ((unsigned long) vaddr.sx << 20UL);
+ ste.val = ptr;
+ goto shadow_pgt;
+ }
+ rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
+ if (rc)
+ return rc;
+ if (ste.i)
+ return PGM_SEGMENT_TRANSLATION;
+ if (ste.tt != TABLE_TYPE_SEGMENT)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.cs && asce.p)
+ return PGM_TRANSLATION_SPEC;
+ *dat_protection |= ste.fc0.p;
+ if (ste.fc && sg->edat_level >= 1) {
+ *fake = 1;
+ ptr = ste.fc1.sfaa << 20UL;
+ ste.val = ptr;
+ goto shadow_pgt;
+ }
+ ptr = ste.fc0.pto << 11UL;
+shadow_pgt:
+ ste.fc0.p |= *dat_protection;
+ rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake);
+ if (rc)
+ return rc;
+ }
+ }
+ /* Return the parent address of the page table */
+ *pgt = ptr;
+ return 0;
+}
+
+/**
+ * kvm_s390_shadow_fault - handle fault on a shadow page table
+ * @vcpu: virtual cpu
+ * @sg: pointer to the shadow guest address space structure
+ * @saddr: faulting address in the shadow gmap
+ *
+ * Returns: - 0 if the shadow fault was successfully resolved
+ * - > 0 (pgm exception code) on exceptions while faulting
+ * - -EAGAIN if the caller can retry immediately
+ * - -EFAULT when accessing invalid guest addresses
+ * - -ENOMEM if out of memory
+ */
+int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
+ unsigned long saddr)
+{
+ union vaddress vaddr;
+ union page_table_entry pte;
+ unsigned long pgt;
+ int dat_protection, fake;
+ int rc;
+
+ down_read(&sg->mm->mmap_sem);
+ /*
+ * We don't want any guest-2 tables to change - so the parent
+ * tables/pointers we read stay valid - unshadowing is however
+ * always possible - only guest_table_lock protects us.
+ */
+ ipte_lock(vcpu);
+
+ rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+ if (rc)
+ rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
+ &fake);
+
+ vaddr.addr = saddr;
+ if (fake) {
+ /* offset in 1MB guest memory block */
+ pte.val = pgt + ((unsigned long) vaddr.px << 12UL);
+ goto shadow_page;
+ }
+ if (!rc)
+ rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+ if (!rc && pte.i)
+ rc = PGM_PAGE_TRANSLATION;
+ if (!rc && (pte.z || (pte.co && sg->edat_level < 1)))
+ rc = PGM_TRANSLATION_SPEC;
+shadow_page:
+ pte.p |= dat_protection;
+ if (!rc)
+ rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
+ ipte_unlock(vcpu);
+ up_read(&sg->mm->mmap_sem);
+ return rc;
}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index df0a79dd8159..8756569ad938 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -361,4 +361,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu);
int ipte_lock_held(struct kvm_vcpu *vcpu);
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
+int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
+ unsigned long saddr);
+
#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index e8c6843b9600..31a05330d11c 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -439,6 +439,23 @@ exit_required:
#define guest_per_enabled(vcpu) \
(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER)
+int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu)
+{
+ const u8 ilen = kvm_s390_get_ilen(vcpu);
+ struct kvm_s390_pgm_info pgm_info = {
+ .code = PGM_PER,
+ .per_code = PER_EVENT_IFETCH >> 24,
+ .per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen),
+ };
+
+ /*
+ * The PSW points to the next instruction, therefore the intercepted
+ * instruction generated a PER i-fetch event. PER address therefore
+ * points at the previous PSW address (could be an EXECUTE function).
+ */
+ return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
+}
+
static void filter_guest_per_event(struct kvm_vcpu *vcpu)
{
u32 perc = vcpu->arch.sie_block->perc << 24;
@@ -465,7 +482,7 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu)
guest_perc &= ~PER_EVENT_IFETCH;
/* All other PER events will be given to the guest */
- /* TODO: Check alterated address/address space */
+ /* TODO: Check altered address/address space */
vcpu->arch.sie_block->perc = guest_perc >> 24;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 252157181302..dfd0ca2638fa 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -351,8 +351,26 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+static int handle_operexc(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.exit_operation_exception++;
+ trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
+ vcpu->arch.sie_block->ipb);
+
+ if (vcpu->arch.sie_block->ipa == 0xb256 &&
+ test_kvm_facility(vcpu->kvm, 74))
+ return handle_sthyi(vcpu);
+
+ if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
+ return -EOPNOTSUPP;
+
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
+ int rc, per_rc = 0;
+
if (kvm_is_ucontrol(vcpu->kvm))
return -EOPNOTSUPP;
@@ -361,7 +379,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
case 0x18:
return handle_noop(vcpu);
case 0x04:
- return handle_instruction(vcpu);
+ rc = handle_instruction(vcpu);
+ break;
case 0x08:
return handle_prog(vcpu);
case 0x14:
@@ -372,9 +391,19 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
return handle_validity(vcpu);
case 0x28:
return handle_stop(vcpu);
+ case 0x2c:
+ rc = handle_operexc(vcpu);
+ break;
case 0x38:
- return handle_partial_execution(vcpu);
+ rc = handle_partial_execution(vcpu);
+ break;
default:
return -EOPNOTSUPP;
}
+
+ /* process PER, also if the instrution is processed in user space */
+ if (vcpu->arch.sie_block->icptstatus & 0x02 &&
+ (!rc || rc == -EOPNOTSUPP))
+ per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
+ return per_rc ? per_rc : rc;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5a80af740d3e..24524c0f3ef8 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,9 +28,6 @@
#include "gaccess.h"
#include "trace-s390.h"
-#define IOINT_SCHID_MASK 0x0000ffff
-#define IOINT_SSID_MASK 0x00030000
-#define IOINT_CSSID_MASK 0x03fc0000
#define PFAULT_INIT 0x0600
#define PFAULT_DONE 0x0680
#define VIRTIO_PARAM 0x0d00
@@ -821,7 +818,14 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info,
list);
if (inti) {
- VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
+ if (inti->type & KVM_S390_INT_IO_AI_MASK)
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)");
+ else
+ VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x",
+ inti->io.subchannel_id >> 8,
+ inti->io.subchannel_id >> 1 & 0x3,
+ inti->io.subchannel_nr);
+
vcpu->stat.deliver_io_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
inti->type,
@@ -991,6 +995,11 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
swake_up(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
+ /*
+ * The VCPU might not be sleeping but is executing the VSIE. Let's
+ * kick it, so it leaves the SIE to process the request.
+ */
+ kvm_s390_vsie_kick(vcpu);
}
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
@@ -1415,6 +1424,13 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
}
fi->counters[FIRQ_CNTR_IO] += 1;
+ if (inti->type & KVM_S390_INT_IO_AI_MASK)
+ VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)");
+ else
+ VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x",
+ inti->io.subchannel_id >> 8,
+ inti->io.subchannel_id >> 1 & 0x3,
+ inti->io.subchannel_nr);
isc = int_word_to_isc(inti->io.io_int_word);
list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
list_add_tail(&inti->list, list);
@@ -1531,13 +1547,6 @@ int kvm_s390_inject_vm(struct kvm *kvm,
inti->mchk.mcic = s390int->parm64;
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- if (inti->type & KVM_S390_INT_IO_AI_MASK)
- VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
- else
- VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
- s390int->type & IOINT_CSSID_MASK,
- s390int->type & IOINT_SSID_MASK,
- s390int->type & IOINT_SCHID_MASK);
inti->io.subchannel_id = s390int->parm >> 16;
inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
inti->io.io_int_parm = s390int->parm64 >> 32;
@@ -2237,7 +2246,8 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
return ret;
}
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int ret;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6f5c344cd785..3f3ae4865d57 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -21,11 +21,13 @@
#include <linux/init.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <linux/mman.h>
#include <linux/module.h>
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
#include <asm/stp.h>
@@ -35,6 +37,8 @@
#include <asm/switch_to.h>
#include <asm/isc.h>
#include <asm/sclp.h>
+#include <asm/cpacf.h>
+#include <asm/timex.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -64,6 +68,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_pei", VCPU_STAT(exit_pei) },
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+ { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
@@ -94,6 +99,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
+ { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
+ { "instruction_sie", VCPU_STAT(instruction_sie) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
@@ -119,6 +126,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
+/* allow nested virtualization in KVM (if enabled by user space) */
+static int nested;
+module_param(nested, int, S_IRUGO);
+MODULE_PARM_DESC(nested, "Nested virtualization support");
+
/* upper facilities limit for kvm */
unsigned long kvm_s390_fac_list_mask[16] = {
0xffe6000000000000UL,
@@ -131,7 +143,13 @@ unsigned long kvm_s390_fac_list_mask_size(void)
return ARRAY_SIZE(kvm_s390_fac_list_mask);
}
+/* available cpu features supported by kvm */
+static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+/* available subfunctions indicated via query / "test bit" */
+static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
+
static struct gmap_notifier gmap_notifier;
+static struct gmap_notifier vsie_gmap_notifier;
debug_info_t *kvm_s390_dbf;
/* Section: not file related */
@@ -141,7 +159,8 @@ int kvm_arch_hardware_enable(void)
return 0;
}
-static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
+static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
+ unsigned long end);
/*
* This callback is executed during stop_machine(). All CPUs are therefore
@@ -163,6 +182,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
vcpu->arch.sie_block->epoch -= *delta;
if (vcpu->arch.cputm_enabled)
vcpu->arch.cputm_start += *delta;
+ if (vcpu->arch.vsie_block)
+ vcpu->arch.vsie_block->epoch -= *delta;
}
}
return NOTIFY_OK;
@@ -175,7 +196,9 @@ static struct notifier_block kvm_clock_notifier = {
int kvm_arch_hardware_setup(void)
{
gmap_notifier.notifier_call = kvm_gmap_notifier;
- gmap_register_ipte_notifier(&gmap_notifier);
+ gmap_register_pte_notifier(&gmap_notifier);
+ vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
+ gmap_register_pte_notifier(&vsie_gmap_notifier);
atomic_notifier_chain_register(&s390_epoch_delta_notifier,
&kvm_clock_notifier);
return 0;
@@ -183,11 +206,109 @@ int kvm_arch_hardware_setup(void)
void kvm_arch_hardware_unsetup(void)
{
- gmap_unregister_ipte_notifier(&gmap_notifier);
+ gmap_unregister_pte_notifier(&gmap_notifier);
+ gmap_unregister_pte_notifier(&vsie_gmap_notifier);
atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
&kvm_clock_notifier);
}
+static void allow_cpu_feat(unsigned long nr)
+{
+ set_bit_inv(nr, kvm_s390_available_cpu_feat);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+ register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
+ int cc = 3; /* subfunction not available */
+
+ asm volatile(
+ /* Parameter registers are ignored for "test bit" */
+ " plo 0,0,0,0(0)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc)
+ : "d" (r0)
+ : "cc");
+ return cc == 0;
+}
+
+static void kvm_s390_cpu_feat_init(void)
+{
+ int i;
+
+ for (i = 0; i < 256; ++i) {
+ if (plo_test_bit(i))
+ kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
+ }
+
+ if (test_facility(28)) /* TOD-clock steering */
+ ptff(kvm_s390_available_subfunc.ptff,
+ sizeof(kvm_s390_available_subfunc.ptff),
+ PTFF_QAF);
+
+ if (test_facility(17)) { /* MSA */
+ __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
+ __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
+ __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
+ __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
+ __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
+ }
+ if (test_facility(76)) /* MSA3 */
+ __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
+ if (test_facility(77)) { /* MSA4 */
+ __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
+ __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
+ __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
+ __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
+ }
+ if (test_facility(57)) /* MSA5 */
+ __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
+
+ if (MACHINE_HAS_ESOP)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
+ /*
+ * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
+ * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
+ */
+ if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
+ !test_facility(3) || !nested)
+ return;
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
+ if (sclp.has_64bscao)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
+ if (sclp.has_siif)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
+ if (sclp.has_gpere)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
+ if (sclp.has_gsls)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
+ if (sclp.has_ib)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
+ if (sclp.has_cei)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
+ if (sclp.has_ibs)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
+ /*
+ * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
+ * all skey handling functions read/set the skey from the PGSTE
+ * instead of the real storage key.
+ *
+ * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
+ * pages being detected as preserved although they are resident.
+ *
+ * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
+ * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
+ *
+ * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
+ * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
+ * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
+ *
+ * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We