summaryrefslogtreecommitdiffstats
path: root/arch/s390/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 08:47:12 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 08:47:12 -0700
commitb05d59dfceaea72565b1648af929b037b0f96d7f (patch)
treebbe92714be468ed8783bce6ac2c305c0aedf8eb5 /arch/s390/kvm
parentdaf342af2f7856fd2f5c66b9fb39a8f24986ca53 (diff)
parent820b3fcdeb80d30410f4427d2cbf9161c35fdeef (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm into next
Pull KVM updates from Paolo Bonzini: "At over 200 commits, covering almost all supported architectures, this was a pretty active cycle for KVM. Changes include: - a lot of s390 changes: optimizations, support for migration, GDB support and more - ARM changes are pretty small: support for the PSCI 0.2 hypercall interface on both the guest and the host (the latter acked by Catalin) - initial POWER8 and little-endian host support - support for running u-boot on embedded POWER targets - pretty large changes to MIPS too, completing the userspace interface and improving the handling of virtualized timer hardware - for x86, a larger set of changes is scheduled for 3.17. Still, we have a few emulator bugfixes and support for running nested fully-virtualized Xen guests (para-virtualized Xen guests have always worked). And some optimizations too. The only missing architecture here is ia64. It's not a coincidence that support for KVM on ia64 is scheduled for removal in 3.17" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (203 commits) KVM: add missing cleanup_srcu_struct KVM: PPC: Book3S PR: Rework SLB switching code KVM: PPC: Book3S PR: Use SLB entry 0 KVM: PPC: Book3S HV: Fix machine check delivery to guest KVM: PPC: Book3S HV: Work around POWER8 performance monitor bugs KVM: PPC: Book3S HV: Make sure we don't miss dirty pages KVM: PPC: Book3S HV: Fix dirty map for hugepages KVM: PPC: Book3S HV: Put huge-page HPTEs in rmap chain for base address KVM: PPC: Book3S HV: Fix check for running inside guest in global_invalidates() KVM: PPC: Book3S: Move KVM_REG_PPC_WORT to an unused register number KVM: PPC: Book3S: Add ONE_REG register names that were missed KVM: PPC: Add CAP to indicate hcall fixes KVM: PPC: MPIC: Reset IRQ source private members KVM: PPC: Graciously fail broken LE hypercalls PPC: ePAPR: Fix hypercall on LE guest KVM: PPC: BOOK3S: Remove open coded make_dsisr in alignment handler KVM: PPC: BOOK3S: Always use the saved DAR value PPC: KVM: Make NX bit available with magic page KVM: PPC: Disable NX for old magic page using guests KVM: PPC: BOOK3S: HV: Add mixed page-size support for guest ...
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r--arch/s390/kvm/Makefile4
-rw-r--r--arch/s390/kvm/diag.c19
-rw-r--r--arch/s390/kvm/gaccess.c726
-rw-r--r--arch/s390/kvm/gaccess.h379
-rw-r--r--arch/s390/kvm/guestdbg.c482
-rw-r--r--arch/s390/kvm/intercept.c222
-rw-r--r--arch/s390/kvm/interrupt.c400
-rw-r--r--arch/s390/kvm/kvm-s390.c554
-rw-r--r--arch/s390/kvm/kvm-s390.h73
-rw-r--r--arch/s390/kvm/priv.c357
-rw-r--r--arch/s390/kvm/sigp.c103
-rw-r--r--arch/s390/kvm/trace-s390.h43
-rw-r--r--arch/s390/kvm/trace.h99
13 files changed, 2963 insertions, 498 deletions
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index d3adb37e93a4..b3b553469650 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -11,5 +11,7 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
-kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o diag.o
+kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
+kvm-objs += diag.o gaccess.o guestdbg.o
+
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 08dfc839a6cf..0161675878a2 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -23,7 +23,7 @@
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
unsigned long start, end;
- unsigned long prefix = vcpu->arch.sie_block->prefix;
+ unsigned long prefix = kvm_s390_get_prefix(vcpu);
start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
@@ -64,12 +64,12 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
int rc;
u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
- unsigned long hva_token = KVM_HVA_ERR_BAD;
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
- return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -89,8 +89,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
- if (kvm_is_error_hva(hva_token))
+ if (kvm_is_error_gpa(vcpu->kvm, parm.token_addr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
vcpu->arch.pfault_token = parm.token_addr;
@@ -167,23 +166,17 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode);
switch (subcode) {
- case 0:
- case 1:
- page_table_reset_pgste(current->mm, 0, TASK_SIZE);
- return -EOPNOTSUPP;
case 3:
vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
- page_table_reset_pgste(current->mm, 0, TASK_SIZE);
break;
case 4:
vcpu->run->s390_reset_flags = 0;
- page_table_reset_pgste(current->mm, 0, TASK_SIZE);
break;
default:
return -EOPNOTSUPP;
}
- atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
new file mode 100644
index 000000000000..4653ac6e182b
--- /dev/null
+++ b/arch/s390/kvm/gaccess.c
@@ -0,0 +1,726 @@
+/*
+ * guest access functions
+ *
+ * Copyright IBM Corp. 2014
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/err.h>
+#include <asm/pgtable.h>
+#include "kvm-s390.h"
+#include "gaccess.h"
+
+union asce {
+ unsigned long val;
+ struct {
+ unsigned long origin : 52; /* Region- or Segment-Table Origin */
+ unsigned long : 2;
+ unsigned long g : 1; /* Subspace Group Control */
+ unsigned long p : 1; /* Private Space Control */
+ unsigned long s : 1; /* Storage-Alteration-Event Control */
+ unsigned long x : 1; /* Space-Switch-Event Control */
+ unsigned long r : 1; /* Real-Space Control */
+ unsigned long : 1;
+ unsigned long dt : 2; /* Designation-Type Control */
+ unsigned long tl : 2; /* Region- or Segment-Table Length */
+ };
+};
+
+enum {
+ ASCE_TYPE_SEGMENT = 0,
+ ASCE_TYPE_REGION3 = 1,
+ ASCE_TYPE_REGION2 = 2,
+ ASCE_TYPE_REGION1 = 3
+};
+
+union region1_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Second-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Second-Table Length */
+ };
+};
+
+union region2_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long rto: 52;/* Region-Table Origin */
+ unsigned long : 2;
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Region-Third-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long : 1;
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Region-Third-Table Length */
+ };
+};
+
+struct region3_table_entry_fc0 {
+ unsigned long sto: 52;/* Segment-Table Origin */
+ unsigned long : 1;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 1;
+ unsigned long tf : 2; /* Segment-Table Offset */
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long tl : 2; /* Segment-Table Length */
+};
+
+struct region3_table_entry_fc1 {
+ unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+ unsigned long : 14;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc: 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long co : 1; /* Change-Recording Override */
+ unsigned long : 2;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union region3_table_entry {
+ unsigned long val;
+ struct region3_table_entry_fc0 fc0;
+ struct region3_table_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Region-Invalid Bit */
+ unsigned long cr : 1; /* Common-Region Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+struct segment_entry_fc0 {
+ unsigned long pto: 53;/* Page-Table Origin */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long : 3;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+struct segment_entry_fc1 {
+ unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+ unsigned long : 3;
+ unsigned long av : 1; /* ACCF-Validity Control */
+ unsigned long acc: 4; /* Access-Control Bits */
+ unsigned long f : 1; /* Fetch-Protection Bit */
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long co : 1; /* Change-Recording Override */
+ unsigned long : 2;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+};
+
+union segment_table_entry {
+ unsigned long val;
+ struct segment_entry_fc0 fc0;
+ struct segment_entry_fc1 fc1;
+ struct {
+ unsigned long : 53;
+ unsigned long fc : 1; /* Format-Control */
+ unsigned long : 4;
+ unsigned long i : 1; /* Segment-Invalid Bit */
+ unsigned long cs : 1; /* Common-Segment Bit */
+ unsigned long tt : 2; /* Table-Type Bits */
+ unsigned long : 2;
+ };
+};
+
+enum {
+ TABLE_TYPE_SEGMENT = 0,
+ TABLE_TYPE_REGION3 = 1,
+ TABLE_TYPE_REGION2 = 2,
+ TABLE_TYPE_REGION1 = 3
+};
+
+union page_table_entry {
+ unsigned long val;
+ struct {
+ unsigned long pfra : 52; /* Page-Frame Real Address */
+ unsigned long z : 1; /* Zero Bit */
+ unsigned long i : 1; /* Page-Invalid Bit */
+ unsigned long p : 1; /* DAT-Protection Bit */
+ unsigned long co : 1; /* Change-Recording Override */
+ unsigned long : 8;
+ };
+};
+
+/*
+ * vaddress union in order to easily decode a virtual address into its
+ * region first index, region second index etc. parts.
+ */
+union vaddress {
+ unsigned long addr;
+ struct {
+ unsigned long rfx : 11;
+ unsigned long rsx : 11;
+ unsigned long rtx : 11;
+ unsigned long sx : 11;
+ unsigned long px : 8;
+ unsigned long bx : 12;
+ };
+ struct {
+ unsigned long rfx01 : 2;
+ unsigned long : 9;
+ unsigned long rsx01 : 2;
+ unsigned long : 9;
+ unsigned long rtx01 : 2;
+ unsigned long : 9;
+ unsigned long sx01 : 2;
+ unsigned long : 29;
+ };
+};
+
+/*
+ * raddress union which will contain the result (real or absolute address)
+ * after a page table walk. The rfaa, sfaa and pfra members are used to
+ * simply assign them the value of a region, segment or page table entry.
+ */
+union raddress {
+ unsigned long addr;
+ unsigned long rfaa : 33; /* Region-Frame Absolute Address */
+ unsigned long sfaa : 44; /* Segment-Frame Absolute Address */
+ unsigned long pfra : 52; /* Page-Frame Real Address */
+};
+
+static int ipte_lock_count;
+static DEFINE_MUTEX(ipte_mutex);
+
+int ipte_lock_held(struct kvm_vcpu *vcpu)
+{
+ union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+
+ if (vcpu->arch.sie_block->eca & 1)
+ return ic->kh != 0;
+ return ipte_lock_count != 0;
+}
+
+static void ipte_lock_simple(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ mutex_lock(&ipte_mutex);
+ ipte_lock_count++;
+ if (ipte_lock_count > 1)
+ goto out;
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ old = ACCESS_ONCE(*ic);
+ while (old.k) {
+ cond_resched();
+ old = ACCESS_ONCE(*ic);
+ }
+ new = old;
+ new.k = 1;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+out:
+ mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ mutex_lock(&ipte_mutex);
+ ipte_lock_count--;
+ if (ipte_lock_count)
+ goto out;
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ new = old = ACCESS_ONCE(*ic);
+ new.k = 0;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ if (!ipte_lock_count)
+ wake_up(&vcpu->kvm->arch.ipte_wq);
+out:
+ mutex_unlock(&ipte_mutex);
+}
+
+static void ipte_lock_siif(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ old = ACCESS_ONCE(*ic);
+ while (old.kg) {
+ cond_resched();
+ old = ACCESS_ONCE(*ic);
+ }
+ new = old;
+ new.k = 1;
+ new.kh++;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+}
+
+static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
+{
+ union ipte_control old, new, *ic;
+
+ ic = &vcpu->kvm->arch.sca->ipte_control;
+ do {
+ new = old = ACCESS_ONCE(*ic);
+ new.kh--;
+ if (!new.kh)
+ new.k = 0;
+ } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ if (!new.kh)
+ wake_up(&vcpu->kvm->arch.ipte_wq);
+}
+
+void ipte_lock(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->eca & 1)
+ ipte_lock_siif(vcpu);
+ else
+ ipte_lock_simple(vcpu);
+}
+
+void ipte_unlock(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->arch.sie_block->eca & 1)
+ ipte_unlock_siif(vcpu);
+ else
+ ipte_unlock_simple(vcpu);
+}
+
+static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+{
+ switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
+ case PSW_AS_PRIMARY:
+ return vcpu->arch.sie_block->gcr[1];
+ case PSW_AS_SECONDARY:
+ return vcpu->arch.sie_block->gcr[7];
+ case PSW_AS_HOME:
+ return vcpu->arch.sie_block->gcr[13];
+ }
+ return 0;
+}
+
+static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
+{
+ return kvm_read_guest(kvm, gpa, val, sizeof(*val));
+}
+
+/**
+ * guest_translate - translate a guest virtual into a guest absolute address
+ * @vcpu: virtual cpu
+ * @gva: guest virtual address
+ * @gpa: points to where guest physical (absolute) address should be stored
+ * @write: indicates if access is a write access
+ *
+ * Translate a guest virtual address into a guest absolute address by means
+ * of dynamic address translation as specified by the architecuture.
+ * If the resulting absolute address is not available in the configuration
+ * an addressing exception is indicated and @gpa will not be changed.
+ *
+ * Returns: - zero on success; @gpa contains the resulting absolute address
+ * - a negative value if guest access failed due to e.g. broken
+ * guest mapping
+ * - a positve value if an access exception happened. In this case
+ * the returned value is the program interruption code as defined
+ * by the architecture
+ */
+static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write)
+{
+ union vaddress vaddr = {.addr = gva};
+ union raddress raddr = {.addr = gva};
+ union page_table_entry pte;
+ int dat_protection = 0;
+ union ctlreg0 ctlreg0;
+ unsigned long ptr;
+ int edat1, edat2;
+ union asce asce;
+
+ ctlreg0.val = vcpu->arch.sie_block->gcr[0];
+ edat1 = ctlreg0.edat && test_vfacility(8);
+ edat2 = edat1 && test_vfacility(78);
+ asce.val = get_vcpu_asce(vcpu);
+ if (asce.r)
+ goto real_address;
+ ptr = asce.origin * 4096;
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1:
+ if (vaddr.rfx01 > asce.tl)
+ return PGM_REGION_FIRST_TRANS;
+ ptr += vaddr.rfx * 8;
+ break;
+ case ASCE_TYPE_REGION2:
+ if (vaddr.rfx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rsx01 > asce.tl)
+ return PGM_REGION_SECOND_TRANS;
+ ptr += vaddr.rsx * 8;
+ break;
+ case ASCE_TYPE_REGION3:
+ if (vaddr.rfx || vaddr.rsx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.rtx01 > asce.tl)
+ return PGM_REGION_THIRD_TRANS;
+ ptr += vaddr.rtx * 8;
+ break;
+ case ASCE_TYPE_SEGMENT:
+ if (vaddr.rfx || vaddr.rsx || vaddr.rtx)
+ return PGM_ASCE_TYPE;
+ if (vaddr.sx01 > asce.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ ptr += vaddr.sx * 8;
+ break;
+ }
+ switch (asce.dt) {
+ case ASCE_TYPE_REGION1: {
+ union region1_table_entry rfte;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rfte.val))
+ return -EFAULT;
+ if (rfte.i)
+ return PGM_REGION_FIRST_TRANS;
+ if (rfte.tt != TABLE_TYPE_REGION1)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rsx01 < rfte.tf || vaddr.rsx01 > rfte.tl)
+ return PGM_REGION_SECOND_TRANS;
+ if (edat1)
+ dat_protection |= rfte.p;
+ ptr = rfte.rto * 4096 + vaddr.rsx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_REGION2: {
+ union region2_table_entry rste;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rste.val))
+ return -EFAULT;
+ if (rste.i)
+ return PGM_REGION_SECOND_TRANS;
+ if (rste.tt != TABLE_TYPE_REGION2)
+ return PGM_TRANSLATION_SPEC;
+ if (vaddr.rtx01 < rste.tf || vaddr.rtx01 > rste.tl)
+ return PGM_REGION_THIRD_TRANS;
+ if (edat1)
+ dat_protection |= rste.p;
+ ptr = rste.rto * 4096 + vaddr.rtx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_REGION3: {
+ union region3_table_entry rtte;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &rtte.val))
+ return -EFAULT;
+ if (rtte.i)
+ return PGM_REGION_THIRD_TRANS;
+ if (rtte.tt != TABLE_TYPE_REGION3)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.cr && asce.p && edat2)
+ return PGM_TRANSLATION_SPEC;
+ if (rtte.fc && edat2) {
+ dat_protection |= rtte.fc1.p;
+ raddr.rfaa = rtte.fc1.rfaa;
+ goto absolute_address;
+ }
+ if (vaddr.sx01 < rtte.fc0.tf)
+ return PGM_SEGMENT_TRANSLATION;
+ if (vaddr.sx01 > rtte.fc0.tl)
+ return PGM_SEGMENT_TRANSLATION;
+ if (edat1)
+ dat_protection |= rtte.fc0.p;
+ ptr = rtte.fc0.sto * 4096 + vaddr.sx * 8;
+ }
+ /* fallthrough */
+ case ASCE_TYPE_SEGMENT: {
+ union segment_table_entry ste;
+
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &ste.val))
+ return -EFAULT;
+ if (ste.i)
+ return PGM_SEGMENT_TRANSLATION;
+ if (ste.tt != TABLE_TYPE_SEGMENT)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.cs && asce.p)
+ return PGM_TRANSLATION_SPEC;
+ if (ste.fc && edat1) {
+ dat_protection |= ste.fc1.p;
+ raddr.sfaa = ste.fc1.sfaa;
+ goto absolute_address;
+ }
+ dat_protection |= ste.fc0.p;
+ ptr = ste.fc0.pto * 2048 + vaddr.px * 8;
+ }
+ }
+ if (kvm_is_error_gpa(vcpu->kvm, ptr))
+ return PGM_ADDRESSING;
+ if (deref_table(vcpu->kvm, ptr, &pte.val))
+ return -EFAULT;
+ if (pte.i)
+ return PGM_PAGE_TRANSLATION;
+ if (pte.z)
+ return PGM_TRANSLATION_SPEC;
+ if (pte.co && !edat1)
+ return PGM_TRANSLATION_SPEC;
+ dat_protection |= pte.p;
+ raddr.pfra = pte.pfra;
+real_address:
+ raddr.addr = kvm_s390_real_to_abs(vcpu, raddr.addr);
+absolute_address:
+ if (write && dat_protection)
+ return PGM_PROTECTION;
+ if (kvm_is_error_gpa(vcpu->kvm, raddr.addr))
+ return PGM_ADDRESSING;
+ *gpa = raddr.addr;
+ return 0;
+}
+
+static inline int is_low_address(unsigned long ga)
+{
+ /* Check for address ranges 0..511 and 4096..4607 */
+ return (ga & ~0x11fful) == 0;
+}
+
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+{
+ union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ union asce asce;
+
+ if (!ctlreg0.lap)
+ return 0;
+ asce.val = get_vcpu_asce(vcpu);
+ if (psw_bits(*psw).t && asce.p)
+ return 0;
+ return 1;
+}
+
+struct trans_exc_code_bits {
+ unsigned long addr : 52; /* Translation-exception Address */
+ unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
+ unsigned long : 7;
+ unsigned long b61 : 1;
+ unsigned long as : 2; /* ASCE Identifier */
+};
+
+enum {
+ FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+ FSI_STORE = 1, /* Exception was due to store operation */
+ FSI_FETCH = 2 /* Exception was due to fetch operation */
+};
+
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+ unsigned long *pages, unsigned long nr_pages,
+ int write)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec_bits;
+ int lap_enabled, rc;
+
+ memset(pgm, 0, sizeof(*pgm));
+ tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+ tec_bits->as = psw_bits(*psw).as;
+ lap_enabled = low_address_protection_enabled(vcpu);
+ while (nr_pages) {
+ ga = kvm_s390_logical_to_effective(vcpu, ga);
+ tec_bits->addr = ga >> PAGE_SHIFT;
+ if (write && lap_enabled && is_low_address(ga)) {
+ pgm->code = PGM_PROTECTION;
+ return pgm->code;
+ }
+ ga &= PAGE_MASK;
+ if (psw_bits(*psw).t) {
+ rc = guest_translate(vcpu, ga, pages, write);
+ if (rc < 0)
+ return rc;
+ if (rc == PGM_PROTECTION)
+ tec_bits->b61 = 1;
+ if (rc)
+ pgm->code = rc;
+ } else {
+ *pages = kvm_s390_real_to_abs(vcpu, ga);
+ if (kvm_is_error_gpa(vcpu->kvm, *pages))
+ pgm->code = PGM_ADDRESSING;
+ }
+ if (pgm->code)
+ return pgm->code;
+ ga += PAGE_SIZE;
+ pages++;
+ nr_pages--;
+ }
+ return 0;
+}
+
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+ unsigned long len, int write)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ unsigned long _len, nr_pages, gpa, idx;
+ unsigned long pages_array[2];
+ unsigned long *pages;
+ int need_ipte_lock;
+ union asce asce;
+ int rc;
+
+ if (!len)
+ return 0;
+ /* Access register mode is not supported yet. */
+ if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+ return -EOPNOTSUPP;
+ nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
+ pages = pages_array;
+ if (nr_pages > ARRAY_SIZE(pages_array))
+ pages = vmalloc(nr_pages * sizeof(unsigned long));
+ if (!pages)
+ return -ENOMEM;
+ asce.val = get_vcpu_asce(vcpu);
+ need_ipte_lock = psw_bits(*psw).t && !asce.r;
+ if (need_ipte_lock)
+ ipte_lock(vcpu);
+ rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+ for (idx = 0; idx < nr_pages && !rc; idx++) {
+ gpa = *(pages + idx) + (ga & ~PAGE_MASK);
+ _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+ if (write)
+ rc = kvm_write_guest(vcpu->kvm, gpa, data, _len);
+ else
+ rc = kvm_read_guest(vcpu->kvm, gpa, data, _len);
+ len -= _len;
+ ga += _len;
+ data += _len;
+ }
+ if (need_ipte_lock)
+ ipte_unlock(vcpu);
+ if (nr_pages > ARRAY_SIZE(pages_array))
+ vfree(pages);
+ return rc;
+}
+
+int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
+ void *data, unsigned long len, int write)
+{
+ unsigned long _len, gpa;
+ int rc = 0;
+
+ while (len && !rc) {
+ gpa = kvm_s390_real_to_abs(vcpu, gra);
+ _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
+ if (write)
+ rc = write_guest_abs(vcpu, gpa, data, _len);
+ else
+ rc = read_guest_abs(vcpu, gpa, data, _len);
+ len -= _len;
+ gra += _len;
+ data += _len;
+ }
+ return rc;
+}
+
+/**
+ * guest_translate_address - translate guest logical into guest absolute address
+ *
+ * Parameter semantics are the same as the ones from guest_translate.
+ * The memory contents at the guest address are not changed.
+ *
+ * Note: The IPTE lock is not taken during this function, so the caller
+ * has to take care of this.
+ */
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa, int write)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec;
+ union asce asce;
+ int rc;
+
+ /* Access register mode is not supported yet. */
+ if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
+ return -EOPNOTSUPP;
+
+ gva = kvm_s390_logical_to_effective(vcpu, gva);
+ memset(pgm, 0, sizeof(*pgm));
+ tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec->as = psw_bits(*psw).as;
+ tec->fsi = write ? FSI_STORE : FSI_FETCH;
+ tec->addr = gva >> PAGE_SHIFT;
+ if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+ if (write) {
+ rc = pgm->code = PGM_PROTECTION;
+ return rc;
+ }
+ }
+
+ asce.val = get_vcpu_asce(vcpu);
+ if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
+ rc = guest_translate(vcpu, gva, gpa, write);
+ if (rc > 0) {
+ if (rc == PGM_PROTECTION)
+ tec->b61 = 1;
+ pgm->code = rc;
+ }
+ } else {
+ rc = 0;
+ *gpa = kvm_s390_real_to_abs(vcpu, gva);
+ if (kvm_is_error_gpa(vcpu->kvm, *gpa))
+ rc = pgm->code = PGM_ADDRESSING;
+ }
+
+ return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_protection - check for low-address protection
+ * @ga: Guest address
+ *
+ * Checks whether an address is subject to low-address protection and set
+ * up vcpu->arch.pgm accordingly if necessary.
+ *
+ * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
+ */
+int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+{
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct trans_exc_code_bits *tec_bits;
+
+ if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+ return 0;
+
+ memset(pgm, 0, sizeof(*pgm));
+ tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec_bits->fsi = FSI_STORE;
+ tec_bits->as = psw_bits(*psw).as;
+ tec_bits->addr = ga >> PAGE_SHIFT;
+ pgm->code = PGM_PROTECTION;
+
+ return pgm->code;
+}
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 374a439ccc60..0149cf15058a 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -1,7 +1,7 @@
/*
* access guest memory
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2014
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -15,100 +15,321 @@
#include <linux/compiler.h>
#include <linux/kvm_host.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
#include "kvm-s390.h"
-/* Convert real to absolute address by applying the prefix of the CPU */
+/**
+ * kvm_s390_real_to_abs - convert guest real address to guest absolute address
+ * @vcpu - guest virtual cpu
+ * @gra - guest real address
+ *
+ * Returns the guest absolute address that corresponds to the passed guest real
+ * address @gra of a virtual guest cpu by applying its prefix.
+ */
static inline unsigned long kvm_s390_real_to_abs(struct kvm_vcpu *vcpu,
- unsigned long gaddr)
+ unsigned long gra)
{
- unsigned long prefix = vcpu->arch.sie_block->prefix;
- if (gaddr < 2 * PAGE_SIZE)
- gaddr += prefix;
- else if (gaddr >= prefix && gaddr < prefix + 2 * PAGE_SIZE)
- gaddr -= prefix;
- return gaddr;
+ unsigned long prefix = kvm_s390_get_prefix(vcpu);
+
+ if (gra < 2 * PAGE_SIZE)
+ gra += prefix;
+ else if (gra >= prefix && gra < prefix + 2 * PAGE_SIZE)
+ gra -= prefix;
+ return gra;
}
-static inline void __user *__gptr_to_uptr(struct kvm_vcpu *vcpu,
- void __user *gptr,
- int prefixing)
+/**
+ * kvm_s390_logical_to_effective - convert guest logical to effective address
+ * @vcpu: guest virtual cpu
+ * @ga: guest logical address
+ *
+ * Convert a guest vcpu logical address to a guest vcpu effective address by
+ * applying the rules of the vcpu's addressing mode defined by PSW bits 31
+ * and 32 (extendended/basic addressing mode).
+ *
+ * Depending on the vcpu's addressing mode the upper 40 bits (24 bit addressing
+ * mode), 33 bits (31 bit addressing mode) or no bits (64 bit addressing mode)
+ * of @ga will be zeroed and the remaining bits will be returned.
+ */
+static inline unsigned long kvm_s390_logical_to_effective(struct kvm_vcpu *vcpu,
+ unsigned long ga)
{
- unsigned long gaddr = (unsigned long) gptr;
- unsigned long uaddr;
-
- if (prefixing)
- gaddr = kvm_s390_real_to_abs(vcpu, gaddr);
- uaddr = gmap_fault(gaddr, vcpu->arch.gmap);
- if (IS_ERR_VALUE(uaddr))
- uaddr = -EFAULT;
- return (void __user *)uaddr;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+
+ if (psw_bits(*psw).eaba == PSW_AMODE_64BIT)
+ return ga;
+ if (psw_bits(*psw).eaba == PSW_AMODE_31BIT)
+ return ga & ((1UL << 31) - 1);
+ return ga & ((1UL << 24) - 1);
}
-#define get_guest(vcpu, x, gptr) \
-({ \
- __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
- int __mask = sizeof(__typeof__(*(gptr))) - 1; \
- int __ret; \
- \
- if (IS_ERR((void __force *)__uptr)) { \
- __ret = PTR_ERR((void __force *)__uptr); \
- } else { \
- BUG_ON((unsigned long)__uptr & __mask); \
- __ret = get_user(x, __uptr); \
- } \
- __ret; \
-})
+/*
+ * put_guest_lc, read_guest_lc and write_guest_lc are guest access functions
+ * which shall only be used to access the lowcore of a vcpu.
+ * These functions should be used for e.g. interrupt handlers where no
+ * guest memory access protection facilities, like key or low address
+ * protection, are applicable.
+ * At a later point guest vcpu lowcore access should happen via pinned
+ * prefix pages, so that these pages can be accessed directly via the
+ * kernel mapping. All of these *_lc functions can be removed then.
+ */
-#define put_guest(vcpu, x, gptr) \
+/**
+ * put_guest_lc - write a simple variable to a guest vcpu's lowcore
+ * @vcpu: virtual cpu
+ * @x: value to copy to guest
+ * @gra: vcpu's destination guest real address
+ *
+ * Copies a simple value from kernel space to a guest vcpu's lowcore.
+ * The size of the variable may be 1, 2, 4 or 8 bytes. The destination
+ * must be located in the vcpu's lowcore. Otherwise the result is undefined.
+ *
+ * Returns zero on success or -EFAULT on error.
+ *
+ * Note: an error indicates that either the kernel is out of memory or
+ * the guest memory mapping is broken. In any case the best solution
+ * would be to terminate the guest.
+ * It is wrong to inject a guest exception.
+ */
+#define put_guest_lc(vcpu, x, gra) \
({ \
- __typeof__(gptr) __uptr = __gptr_to_uptr(vcpu, gptr, 1);\
- int __mask = sizeof(__typeof__(*(gptr))) - 1; \
- int __ret; \
+ struct kvm_vcpu *__vcpu = (vcpu); \
+ __typeof__(*(gra)) __x = (x); \
+ unsigned long __gpa; \
\
- if (IS_ERR((void __force *)__uptr)) { \
- __ret = PTR_ERR((void __force *)__uptr); \
- } else { \
- BUG_ON((unsigned long)__uptr & __mask); \
- __ret = put_user(x, __uptr); \
- } \
- __ret; \
+ __gpa = (unsigned long)(gra); \
+ __gpa += kvm_s390_get_prefix(__vcpu); \
+ kvm_write_guest(__vcpu->kvm, __gpa, &__x, sizeof(__x)); \
})
-static inline int __copy_guest(struct kvm_vcpu *vcpu, unsigned long to,
- unsigned long from, unsigned long len,
- int to_guest, int prefixing)
+/**
+ * write_guest_lc - copy data from kernel space to guest vc