diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-02 16:11:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-08-02 16:11:27 -0400 |
commit | 221bb8a46e230b9824204ae86537183d9991ff2a (patch) | |
tree | 92510d72285b2285be7cb87288bf088cb28af4c1 /arch/s390/kvm | |
parent | f7b32e4c021fd788f13f6785e17efbc3eb05b351 (diff) | |
parent | 23528bb21ee2c9b27f3feddd77a2a3351a8df148 (diff) |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
- ARM: GICv3 ITS emulation and various fixes. Removal of the
old VGIC implementation.
- s390: support for trapping software breakpoints, nested
virtualization (vSIE), the STHYI opcode, initial extensions
for CPU model support.
- MIPS: support for MIPS64 hosts (32-bit guests only) and lots
of cleanups, preliminary to this and the upcoming support for
hardware virtualization extensions.
- x86: support for execute-only mappings in nested EPT; reduced
vmexit latency for TSC deadline timer (by about 30%) on Intel
hosts; support for more than 255 vCPUs.
- PPC: bugfixes.
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (302 commits)
KVM: PPC: Introduce KVM_CAP_PPC_HTM
MIPS: Select HAVE_KVM for MIPS64_R{2,6}
MIPS: KVM: Reset CP0_PageMask during host TLB flush
MIPS: KVM: Fix ptr->int cast via KVM_GUEST_KSEGX()
MIPS: KVM: Sign extend MFC0/RDHWR results
MIPS: KVM: Fix 64-bit big endian dynamic translation
MIPS: KVM: Fail if ebase doesn't fit in CP0_EBase
MIPS: KVM: Use 64-bit CP0_EBase when appropriate
MIPS: KVM: Set CP0_Status.KX on MIPS64
MIPS: KVM: Make entry code MIPS64 friendly
MIPS: KVM: Use kmap instead of CKSEG0ADDR()
MIPS: KVM: Use virt_to_phys() to get commpage PFN
MIPS: Fix definition of KSEGX() for 64-bit
KVM: VMX: Add VMCS to CPU's loaded VMCSs before VMPTRLD
kvm: x86: nVMX: maintain internal copy of current VMCS
KVM: PPC: Book3S HV: Save/restore TM state in H_CEDE
KVM: PPC: Book3S HV: Pull out TM state save/restore into separate procedures
KVM: arm64: vgic-its: Simplify MAPI error handling
KVM: arm64: vgic-its: Make vgic_its_cmd_handle_mapi similar to other handlers
KVM: arm64: vgic-its: Turn device_id validation into generic ID validation
...
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r-- | arch/s390/kvm/Makefile | 2 | ||||
-rw-r--r-- | arch/s390/kvm/diag.c | 5 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.c | 387 | ||||
-rw-r--r-- | arch/s390/kvm/gaccess.h | 3 | ||||
-rw-r--r-- | arch/s390/kvm/guestdbg.c | 19 | ||||
-rw-r--r-- | arch/s390/kvm/intercept.c | 33 | ||||
-rw-r--r-- | arch/s390/kvm/interrupt.c | 34 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.c | 404 | ||||
-rw-r--r-- | arch/s390/kvm/kvm-s390.h | 22 | ||||
-rw-r--r-- | arch/s390/kvm/priv.c | 226 | ||||
-rw-r--r-- | arch/s390/kvm/sigp.c | 10 | ||||
-rw-r--r-- | arch/s390/kvm/sthyi.c | 471 | ||||
-rw-r--r-- | arch/s390/kvm/trace.h | 49 | ||||
-rw-r--r-- | arch/s390/kvm/vsie.c | 1091 |
14 files changed, 2573 insertions, 183 deletions
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index d42fa38c2429..09a9e6dfc09f 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-objs += diag.o gaccess.o guestdbg.o +kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o vsie.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 1ea4095b67d7..ce865bd4f81d 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -212,6 +212,11 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) return -EOPNOTSUPP; + VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx", + (u32) vcpu->run->s.regs.gprs[2], + (u32) vcpu->run->s.regs.gprs[3], + vcpu->run->s.regs.gprs[4]); + /* * The layout is as follows: * - gpr 2 contains the subchannel id (passed as addr) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 66938d283b77..54200208bf24 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -8,6 +8,7 @@ #include <linux/vmalloc.h> #include <linux/err.h> #include <asm/pgtable.h> +#include <asm/gmap.h> #include "kvm-s390.h" #include "gaccess.h" #include <asm/switch_to.h> @@ -476,18 +477,73 @@ enum { FSI_FETCH = 2 /* Exception was due to fetch operation */ }; -static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, - ar_t ar, enum gacc_mode mode) +enum prot_type { + PROT_TYPE_LA = 0, + PROT_TYPE_KEYC = 1, + PROT_TYPE_ALC = 2, + PROT_TYPE_DAT = 3, +}; + +static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, + ar_t ar, enum gacc_mode mode, enum prot_type prot) { - int rc; - struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - struct trans_exc_code_bits *tec_bits; + struct trans_exc_code_bits *tec; memset(pgm, 0, sizeof(*pgm)); - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw.as; + pgm->code = code; + tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + + switch (code) { + case PGM_ASCE_TYPE: + case PGM_PAGE_TRANSLATION: + case PGM_REGION_FIRST_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_THIRD_TRANS: + case PGM_SEGMENT_TRANSLATION: + /* + * op_access_id only applies to MOVE_PAGE -> set bit 61 + * exc_access_id has to be set to 0 for some instructions. Both + * cases have to be handled by the caller. We can always store + * exc_access_id, as it is undefined for non-ar cases. + */ + tec->addr = gva >> PAGE_SHIFT; + tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + /* FALL THROUGH */ + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_ASTE_SEQUENCE: + case PGM_EXTENDED_AUTHORITY: + pgm->exc_access_id = ar; + break; + case PGM_PROTECTION: + switch (prot) { + case PROT_TYPE_ALC: + tec->b60 = 1; + /* FALL THROUGH */ + case PROT_TYPE_DAT: + tec->b61 = 1; + tec->addr = gva >> PAGE_SHIFT; + tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + /* exc_access_id is undefined for most cases */ + pgm->exc_access_id = ar; + break; + default: /* LA and KEYC set b61 to 0, other params undefined */ + break; + } + break; + } + return code; +} + +static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, + unsigned long ga, ar_t ar, enum gacc_mode mode) +{ + int rc; + struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); if (!psw.t) { asce->val = 0; @@ -510,21 +566,8 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, return 0; case PSW_AS_ACCREG: rc = ar_translation(vcpu, asce, ar, mode); - switch (rc) { - case PGM_ALEN_TRANSLATION: - case PGM_ALE_SEQUENCE: - case PGM_ASTE_VALIDITY: - case PGM_ASTE_SEQUENCE: - case PGM_EXTENDED_AUTHORITY: - vcpu->arch.pgm.exc_access_id = ar; - break; - case PGM_PROTECTION: - tec_bits->b60 = 1; - tec_bits->b61 = 1; - break; - } if (rc > 0) - pgm->code = rc; + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); return rc; } return 0; @@ -729,40 +772,31 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, +static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, unsigned long *pages, unsigned long nr_pages, const union asce asce, enum gacc_mode mode) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec_bits; - int lap_enabled, rc; + int lap_enabled, rc = 0; - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; lap_enabled = low_address_protection_enabled(vcpu, asce); while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); - tec_bits->addr = ga >> PAGE_SHIFT; - if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) { - pgm->code = PGM_PROTECTION; - return pgm->code; - } + if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) + return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, + PROT_TYPE_LA); ga &= PAGE_MASK; if (psw_bits(*psw).t) { rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; - if (rc == PGM_PROTECTION) - tec_bits->b61 = 1; - if (rc) - pgm->code = rc; } else { *pages = kvm_s390_real_to_abs(vcpu, ga); if (kvm_is_error_gpa(vcpu->kvm, *pages)) - pgm->code = PGM_ADDRESSING; + rc = PGM_ADDRESSING; } - if (pgm->code) - return pgm->code; + if (rc) + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT); ga += PAGE_SIZE; pages++; nr_pages--; @@ -783,7 +817,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, if (!len) return 0; - rc = get_vcpu_asce(vcpu, &asce, ar, mode); + ga = kvm_s390_logical_to_effective(vcpu, ga); + rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode); if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; @@ -795,7 +830,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode); + rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); @@ -846,37 +881,28 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, unsigned long *gpa, enum gacc_mode mode) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec; union asce asce; int rc; gva = kvm_s390_logical_to_effective(vcpu, gva); - tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - rc = get_vcpu_asce(vcpu, &asce, ar, mode); - tec->addr = gva >> PAGE_SHIFT; + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); if (rc) return rc; if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (mode == GACC_STORE) { - rc = pgm->code = PGM_PROTECTION; - return rc; - } + if (mode == GACC_STORE) + return trans_exc(vcpu, PGM_PROTECTION, gva, 0, + mode, PROT_TYPE_LA); } if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ rc = guest_translate(vcpu, gva, gpa, asce, mode); - if (rc > 0) { - if (rc == PGM_PROTECTION) - tec->b61 = 1; - pgm->code = rc; - } + if (rc > 0) + return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT); } else { - rc = 0; *gpa = kvm_s390_real_to_abs(vcpu, gva); if (kvm_is_error_gpa(vcpu->kvm, *gpa)) - rc = pgm->code = PGM_ADDRESSING; + return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0); } return rc; @@ -915,20 +941,247 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, */ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec_bits; union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; if (!ctlreg0.lap || !is_low_address(gra)) return 0; + return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA); +} - memset(pgm, 0, sizeof(*pgm)); - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = FSI_STORE; - tec_bits->as = psw_bits(*psw).as; - tec_bits->addr = gra >> PAGE_SHIFT; - pgm->code = PGM_PROTECTION; +/** + * kvm_s390_shadow_tables - walk the guest page table and create shadow tables + * @sg: pointer to the shadow guest address space structure + * @saddr: faulting address in the shadow gmap + * @pgt: pointer to the page table address result + * @fake: pgt references contiguous guest memory block, not a pgtable + */ +static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr, + unsigned long *pgt, int *dat_protection, + int *fake) +{ + struct gmap *parent; + union asce asce; + union vaddress vaddr; + unsigned long ptr; + int rc; + + *fake = 0; + *dat_protection = 0; + parent = sg->parent; + vaddr.addr = saddr; + asce.val = sg->orig_asce; + ptr = asce.origin * 4096; + if (asce.r) { + *fake = 1; + asce.dt = ASCE_TYPE_REGION1; + } + switch (asce.dt) { + case ASCE_TYPE_REGION1: + if (vaddr.rfx01 > asce.tl && !asce.r) + return PGM_REGION_FIRST_TRANS; + break; + case ASCE_TYPE_REGION2: + if (vaddr.rfx) + return PGM_ASCE_TYPE; + if (vaddr.rsx01 > asce.tl) + return PGM_REGION_SECOND_TRANS; + break; + case ASCE_TYPE_REGION3: + if (vaddr.rfx || vaddr.rsx) + return PGM_ASCE_TYPE; + if (vaddr.rtx01 > asce.tl) + return PGM_REGION_THIRD_TRANS; + break; + case ASCE_TYPE_SEGMENT: + if (vaddr.rfx || vaddr.rsx || vaddr.rtx) + return PGM_ASCE_TYPE; + if (vaddr.sx01 > asce.tl) + return PGM_SEGMENT_TRANSLATION; + break; + } + + switch (asce.dt) { + case ASCE_TYPE_REGION1: { + union region1_table_entry rfte; - return pgm->code; + if (*fake) { + /* offset in 16EB guest memory block */ + ptr = ptr + ((unsigned long) vaddr.rsx << 53UL); + rfte.val = ptr; + goto shadow_r2t; + } + rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val); + if (rc) + return rc; + if (rfte.i) + return PGM_REGION_FIRST_TRANS; + if (rfte.tt != TABLE_TYPE_REGION1) + return PGM_TRANSLATION_SPEC; + if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl) + return PGM_REGION_SECOND_TRANS; + if (sg->edat_level >= 1) + *dat_protection |= rfte.p; + ptr = rfte.rto << 12UL; +shadow_r2t: + rc = gmap_shadow_r2t(sg, saddr, rfte.val, *fake); + if (rc) + return rc; + /* fallthrough */ + } + case ASCE_TYPE_REGION2: { + union region2_table_entry rste; + + if (*fake) { + /* offset in 8PB guest memory block */ + ptr = ptr + ((unsigned long) vaddr.rtx << 42UL); + rste.val = ptr; + goto shadow_r3t; + } + rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val); + if (rc) + return rc; + if (rste.i) + return PGM_REGION_SECOND_TRANS; + if (rste.tt != TABLE_TYPE_REGION2) + return PGM_TRANSLATION_SPEC; + if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl) + return PGM_REGION_THIRD_TRANS; + if (sg->edat_level >= 1) + *dat_protection |= rste.p; + ptr = rste.rto << 12UL; +shadow_r3t: + rste.p |= *dat_protection; + rc = gmap_shadow_r3t(sg, saddr, rste.val, *fake); + if (rc) + return rc; + /* fallthrough */ + } + case ASCE_TYPE_REGION3: { + union region3_table_entry rtte; + + if (*fake) { + /* offset in 4TB guest memory block */ + ptr = ptr + ((unsigned long) vaddr.sx << 31UL); + rtte.val = ptr; + goto shadow_sgt; + } + rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val); + if (rc) + return rc; + if (rtte.i) + return PGM_REGION_THIRD_TRANS; + if (rtte.tt != TABLE_TYPE_REGION3) + return PGM_TRANSLATION_SPEC; + if (rtte.cr && asce.p && sg->edat_level >= 2) + return PGM_TRANSLATION_SPEC; + if (rtte.fc && sg->edat_level >= 2) { + *dat_protection |= rtte.fc0.p; + *fake = 1; + ptr = rtte.fc1.rfaa << 31UL; + rtte.val = ptr; + goto shadow_sgt; + } + if (vaddr.sx01 < rtte.fc0.tf || vaddr.sx01 > rtte.fc0.tl) + return PGM_SEGMENT_TRANSLATION; + if (sg->edat_level >= 1) + *dat_protection |= rtte.fc0.p; + ptr = rtte.fc0.sto << 12UL; +shadow_sgt: + rtte.fc0.p |= *dat_protection; + rc = gmap_shadow_sgt(sg, saddr, rtte.val, *fake); + if (rc) + return rc; + /* fallthrough */ + } + case ASCE_TYPE_SEGMENT: { + union segment_table_entry ste; + + if (*fake) { + /* offset in 2G guest memory block */ + ptr = ptr + ((unsigned long) vaddr.sx << 20UL); + ste.val = ptr; + goto shadow_pgt; + } + rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val); + if (rc) + return rc; + if (ste.i) + return PGM_SEGMENT_TRANSLATION; + if (ste.tt != TABLE_TYPE_SEGMENT) + return PGM_TRANSLATION_SPEC; + if (ste.cs && asce.p) + return PGM_TRANSLATION_SPEC; + *dat_protection |= ste.fc0.p; + if (ste.fc && sg->edat_level >= 1) { + *fake = 1; + ptr = ste.fc1.sfaa << 20UL; + ste.val = ptr; + goto shadow_pgt; + } + ptr = ste.fc0.pto << 11UL; +shadow_pgt: + ste.fc0.p |= *dat_protection; + rc = gmap_shadow_pgt(sg, saddr, ste.val, *fake); + if (rc) + return rc; + } + } + /* Return the parent address of the page table */ + *pgt = ptr; + return 0; +} + +/** + * kvm_s390_shadow_fault - handle fault on a shadow page table + * @vcpu: virtual cpu + * @sg: pointer to the shadow guest address space structure + * @saddr: faulting address in the shadow gmap + * + * Returns: - 0 if the shadow fault was successfully resolved + * - > 0 (pgm exception code) on exceptions while faulting + * - -EAGAIN if the caller can retry immediately + * - -EFAULT when accessing invalid guest addresses + * - -ENOMEM if out of memory + */ +int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg, + unsigned long saddr) +{ + union vaddress vaddr; + union page_table_entry pte; + unsigned long pgt; + int dat_protection, fake; + int rc; + + down_read(&sg->mm->mmap_sem); + /* + * We don't want any guest-2 tables to change - so the parent + * tables/pointers we read stay valid - unshadowing is however + * always possible - only guest_table_lock protects us. + */ + ipte_lock(vcpu); + + rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake); + if (rc) + rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection, + &fake); + + vaddr.addr = saddr; + if (fake) { + /* offset in 1MB guest memory block */ + pte.val = pgt + ((unsigned long) vaddr.px << 12UL); + goto shadow_page; + } + if (!rc) + rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val); + if (!rc && pte.i) + rc = PGM_PAGE_TRANSLATION; + if (!rc && (pte.z || (pte.co && sg->edat_level < 1))) + rc = PGM_TRANSLATION_SPEC; +shadow_page: + pte.p |= dat_protection; + if (!rc) + rc = gmap_shadow_page(sg, saddr, __pte(pte.val)); + ipte_unlock(vcpu); + up_read(&sg->mm->mmap_sem); + return rc; } diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index df0a79dd8159..8756569ad938 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -361,4 +361,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu); int ipte_lock_held(struct kvm_vcpu *vcpu); int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra); +int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow, + unsigned long saddr); + #endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index e8c6843b9600..31a05330d11c 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -439,6 +439,23 @@ exit_required: #define guest_per_enabled(vcpu) \ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) +int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu) +{ + const u8 ilen = kvm_s390_get_ilen(vcpu); + struct kvm_s390_pgm_info pgm_info = { + .code = PGM_PER, + .per_code = PER_EVENT_IFETCH >> 24, + .per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen), + }; + + /* + * The PSW points to the next instruction, therefore the intercepted + * instruction generated a PER i-fetch event. PER address therefore + * points at the previous PSW address (could be an EXECUTE function). + */ + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); +} + static void filter_guest_per_event(struct kvm_vcpu *vcpu) { u32 perc = vcpu->arch.sie_block->perc << 24; @@ -465,7 +482,7 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) guest_perc &= ~PER_EVENT_IFETCH; /* All other PER events will be given to the guest */ - /* TODO: Check alterated address/address space */ + /* TODO: Check altered address/address space */ vcpu->arch.sie_block->perc = guest_perc >> 24; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 252157181302..dfd0ca2638fa 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -351,8 +351,26 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_operexc(struct kvm_vcpu *vcpu) +{ + vcpu->stat.exit_operation_exception++; + trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, + vcpu->arch.sie_block->ipb); + + if (vcpu->arch.sie_block->ipa == 0xb256 && + test_kvm_facility(vcpu->kvm, 74)) + return handle_sthyi(vcpu); + + if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) + return -EOPNOTSUPP; + + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); +} + int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { + int rc, per_rc = 0; + if (kvm_is_ucontrol(vcpu->kvm)) return -EOPNOTSUPP; @@ -361,7 +379,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) case 0x18: return handle_noop(vcpu); case 0x04: - return handle_instruction(vcpu); + rc = handle_instruction(vcpu); + break; case 0x08: return handle_prog(vcpu); case 0x14: @@ -372,9 +391,19 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) return handle_validity(vcpu); case 0x28: return handle_stop(vcpu); + case 0x2c: + rc = handle_operexc(vcpu); + break; case 0x38: - return handle_partial_execution(vcpu); + rc = handle_partial_execution(vcpu); + break; default: return -EOPNOTSUPP; } + + /* process PER, also if the instrution is processed in user space */ + if (vcpu->arch.sie_block->icptstatus & 0x02 && + (!rc || rc == -EOPNOTSUPP)) + per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu); + return per_rc ? per_rc : rc; } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5a80af740d3e..24524c0f3ef8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -28,9 +28,6 @@ #include "gaccess.h" #include "trace-s390.h" -#define IOINT_SCHID_MASK 0x0000ffff -#define IOINT_SSID_MASK 0x00030000 -#define IOINT_CSSID_MASK 0x03fc0000 #define PFAULT_INIT 0x0600 #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 @@ -821,7 +818,14 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info, list); if (inti) { - VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type); + if (inti->type & KVM_S390_INT_IO_AI_MASK) + VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)"); + else + VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x", + inti->io.subchannel_id >> 8, + inti->io.subchannel_id >> 1 & 0x3, + inti->io.subchannel_nr); + vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, @@ -991,6 +995,11 @@ void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) swake_up(&vcpu->wq); vcpu->stat.halt_wakeup++; } + /* + * The VCPU might not be sleeping but is executing the VSIE. Let's + * kick it, so it leaves the SIE to process the request. + */ + kvm_s390_vsie_kick(vcpu); } enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer) @@ -1415,6 +1424,13 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } fi->counters[FIRQ_CNTR_IO] += 1; + if (inti->type & KVM_S390_INT_IO_AI_MASK) + VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)"); + else + VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x", + inti->io.subchannel_id >> 8, + inti->io.subchannel_id >> 1 & 0x3, + inti->io.subchannel_nr); isc = int_word_to_isc(inti->io.io_int_word); list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; list_add_tail(&inti->list, list); @@ -1531,13 +1547,6 @@ int kvm_s390_inject_vm(struct kvm *kvm, inti->mchk.mcic = s390int->parm64; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (inti->type & KVM_S390_INT_IO_AI_MASK) - VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); - else - VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", - s390int->type & IOINT_CSSID_MASK, - s390int->type & IOINT_SSID_MASK, - s390int->type & IOINT_SCHID_MASK); inti->io.subchannel_id = s390int->parm >> 16; inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; inti->io.io_int_parm = s390int->parm64 >> 32; @@ -2237,7 +2246,8 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e, return ret; } -int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { int ret; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6f5c344cd785..3f3ae4865d57 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -21,11 +21,13 @@ #include <linux/init.h> #include <linux/kvm.h> #include <linux/kvm_host.h> +#include <linux/mman.h> #include <linux/module.h> #include <linux/random.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/vmalloc.h> +#include <linux/bitmap.h> #include <asm/asm-offsets.h> #include <asm/lowcore.h> #include <asm/stp.h> @@ -35,6 +37,8 @@ #include <asm/switch_to.h> #include <asm/isc.h> #include <asm/sclp.h> +#include <asm/cpacf.h> +#include <asm/timex.h> #include "kvm-s390.h" #include "gaccess.h" @@ -64,6 +68,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_pei", VCPU_STAT(exit_pei) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, @@ -94,6 +99,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_stsi", VCPU_STAT(instruction_stsi) }, { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, + { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, + { "instruction_sie", VCPU_STAT(instruction_sie) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, @@ -119,6 +126,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +/* allow nested virtualization in KVM (if enabled by user space) */ +static int nested; +module_param(nested, int, S_IRUGO); +MODULE_PARM_DESC(nested, "Nested virtualization support"); + /* upper facilities limit for kvm */ unsigned long kvm_s390_fac_list_mask[16] = { 0xffe6000000000000UL, @@ -131,7 +143,13 @@ unsigned long kvm_s390_fac_list_mask_size(void) return ARRAY_SIZE(kvm_s390_fac_list_mask); } +/* available cpu features supported by kvm */ +static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); +/* available subfunctions indicated via query / "test bit" */ +static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; + static struct gmap_notifier gmap_notifier; +static struct gmap_notifier vsie_gmap_notifier; debug_info_t *kvm_s390_dbf; /* Section: not file related */ @@ -141,7 +159,8 @@ int kvm_arch_hardware_enable(void) return 0; } -static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); +static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start, + unsigned long end); /* * This callback is executed during stop_machine(). All CPUs are therefore @@ -163,6 +182,8 @@ static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val, vcpu->arch.sie_block->epoch -= *delta; if (vcpu->arch.cputm_enabled) vcpu->arch.cputm_start += *delta; + if (vcpu->arch.vsie_block) + vcpu->arch.vsie_block->epoch -= *delta; } } return NOTIFY_OK; @@ -175,7 +196,9 @@ static struct notifier_block kvm_clock_notifier = { int kvm_arch_hardware_setup(void) { gmap_notifier.notifier_call = kvm_gmap_notifier; - gmap_register_ipte_notifier(&gmap_notifier); + gmap_register_pte_notifier(&gmap_notifier); + vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier; + gmap_register_pte_notifier(&vsie_gmap_notifier); atomic_notifier_chain_register(&s390_epoch_delta_notifier, &kvm_clock_notifier); return 0; @@ -183,11 +206,109 @@ int kvm_arch_hardware_setup(void) void kvm_arch_hardware_unsetup(void) { - gmap_unregister_ipte_notifier(&gmap_notifier); + gmap_unregister_pte_notifier(&gmap_notifier); + gmap_unregister_pte_notifier(&vsie_gmap_notifier); atomic_notifier_chain_unregister(&s390_epoch_delta_notifier, &kvm_clock_notifier); } +static void allow_cpu_feat(unsigned long nr) +{ + set_bit_inv(nr, kvm_s390_available_cpu_feat); +} + +static inline int plo_test_bit(unsigned char nr) +{ + register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; + int cc = 3; /* subfunction not available */ + + asm volatile( + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "d" (r0) + : "cc"); + return cc == 0; +} + +static void kvm_s390_cpu_feat_init(void) +{ + int i; + + for (i = 0; i < 256; ++i) { + if (plo_test_bit(i)) + kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); + } + + if (test_facility(28)) /* TOD-clock steering */ + ptff(kvm_s390_available_subfunc.ptff, + sizeof(kvm_s390_available_subfunc.ptff), + PTFF_QAF); + + if (test_facility(17)) { /* MSA */ + __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac); + __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc); + __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km); + __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd); + __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd); + } + if (test_facility(76)) /* MSA3 */ + __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo); + if (test_facility(77)) { /* MSA4 */ + __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr); + __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf); + __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo); + __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc); + } + if (test_facility(57)) /* MSA5 */ + __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno); + + if (MACHINE_HAS_ESOP) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); + /* + * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), + * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing). + */ + if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao || + !test_facility(3) || !nested) + return; + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); + if (sclp.has_64bscao) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); + if (sclp.has_siif) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); + if (sclp.has_gpere) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); + if (sclp.has_gsls) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); + if (sclp.has_ib) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); + if (sclp.has_cei) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); + if (sclp.has_ibs) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); + /* + * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make + * all skey handling functions read/set the skey from the PGSTE + * instead of the real storage key. + * + * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make + * pages being detected as preserved although they are resident. + * + * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will + * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. + * + * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and + * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be + * correctly shadowed. We can do that for the PGSTE but not for PTE.I. + * + * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We |