From 7fe864dc942c041cc4f56e287c4025d54a8e6c1e Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:03 +0300 Subject: KVM: x86: Mark VEX-prefix instructions emulation as unimplemented Currently the emulator does not recognize vex-prefix instructions. However, it may incorrectly decode lgdt/lidt instructions and try to execute them. This patch returns unhandlable error on their emulation. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e4e833d3d7d7..8ec4a3ebf403 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4314,6 +4314,13 @@ done_prefixes: if (ctxt->d & ModRM) ctxt->modrm = insn_fetch(u8, ctxt); + /* vex-prefix instructions are not implemented */ + if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) && + (mode == X86EMUL_MODE_PROT64 || + (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) { + ctxt->d = NotImpl; + } + while (ctxt->d & GroupMask) { switch (ctxt->d & GroupMask) { case Group: -- cgit v1.2.3 From e37a75a13cdae5deaa2ea2cbf8d55b5dd08638b6 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:04 +0300 Subject: KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR The current implementation ignores the LDTR/TR base high 32-bits on long-mode. As a result the loaded segment descriptor may be incorrect. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8ec4a3ebf403..136088fb038b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1422,6 +1422,7 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, ulong desc_addr; int ret; u16 dummy; + u32 base3 = 0; memset(&seg_desc, 0, sizeof seg_desc); @@ -1538,9 +1539,14 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, ret = write_segment_descriptor(ctxt, selector, &seg_desc); if (ret != X86EMUL_CONTINUE) return ret; + } else if (ctxt->mode == X86EMUL_MODE_PROT64) { + ret = ctxt->ops->read_std(ctxt, desc_addr+8, &base3, + sizeof(base3), &ctxt->exception); + if (ret != X86EMUL_CONTINUE) + return ret; } load: - ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg); + ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); -- cgit v1.2.3 From 2eedcac8a97cef43c9c5236398fc8c9d0fd9cc0c Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:05 +0300 Subject: KVM: x86: Loading segments on 64-bit mode may be wrong The current emulator implementation ignores the high 32 bits of the base in long-mode. During segment load from the LDT, the base of the LDT is calculated incorrectly and may cause the wrong segment to be loaded. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 136088fb038b..7e4a45cab400 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1358,17 +1358,19 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, u16 selector, struct desc_ptr *dt) { const struct x86_emulate_ops *ops = ctxt->ops; + u32 base3 = 0; if (selector & 1 << 2) { struct desc_struct desc; u16 sel; memset (dt, 0, sizeof *dt); - if (!ops->get_segment(ctxt, &sel, &desc, NULL, VCPU_SREG_LDTR)) + if (!ops->get_segment(ctxt, &sel, &desc, &base3, + VCPU_SREG_LDTR)) return; dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ - dt->address = get_desc_base(&desc); + dt->address = get_desc_base(&desc) | ((u64)base3 << 32); } else ops->get_gdt(ctxt, dt); } -- cgit v1.2.3 From 606b1c3e87597c2d6c9f3eb833a7251262390295 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:06 +0300 Subject: KVM: x86: sgdt and sidt are not privilaged The SGDT and SIDT instructions are not privilaged, i.e. they can be executed with CPL>0. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7e4a45cab400..a16bf225cab0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3642,8 +3642,8 @@ static const struct opcode group6[] = { }; static const struct group_dual group7 = { { - II(Mov | DstMem | Priv, em_sgdt, sgdt), - II(Mov | DstMem | Priv, em_sidt, sidt), + II(Mov | DstMem, em_sgdt, sgdt), + II(Mov | DstMem, em_sidt, sidt), II(SrcMem | Priv, em_lgdt, lgdt), II(SrcMem | Priv, em_lidt, lidt), II(SrcNone | DstMem | Mov, em_smsw, smsw), N, -- cgit v1.2.3 From 37c564f2854bf75969d0ac26e03f5cf2bb7d639f Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:07 +0300 Subject: KVM: x86: cmpxchg emulation should compare in reverse order The current implementation of cmpxchg does not update the flags correctly, since the accumulator should be compared with the destination and not the other way around. The current implementation does not update the flags correctly. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a16bf225cab0..4cb0da660283 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2052,8 +2052,10 @@ static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt) static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) { /* Save real source value, then compare EAX against destination. */ + ctxt->dst.orig_val = ctxt->dst.val; + ctxt->dst.val = reg_read(ctxt, VCPU_REGS_RAX); ctxt->src.orig_val = ctxt->src.val; - ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); + ctxt->src.val = ctxt->dst.orig_val; fastop(ctxt, em_cmp); if (ctxt->eflags & EFLG_ZF) { @@ -2063,6 +2065,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) /* Failure: write the value we saw to EAX. */ ctxt->dst.type = OP_REG; ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); + ctxt->dst.val = ctxt->dst.orig_val; } return X86EMUL_CONTINUE; } -- cgit v1.2.3 From 3b32004a66e96e17d2a031c08d3304245c506dfc Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:08 +0300 Subject: KVM: x86: movnti minimum op size of 32-bit is not kept If the operand-size prefix (0x66) is used in 64-bit mode, the emulator would assume the destination operand is 64-bit, when it should be 32-bit. Reminder: movnti does not support 16-bit operands and its default operand size is 32-bit. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4cb0da660283..be3f7645e39d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4836,8 +4836,8 @@ twobyte_insn: break; case 0xc3: /* movnti */ ctxt->dst.bytes = ctxt->op_bytes; - ctxt->dst.val = (ctxt->op_bytes == 4) ? (u32) ctxt->src.val : - (u64) ctxt->src.val; + ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val : + (u32) ctxt->src.val; break; default: goto cannot_emulate; -- cgit v1.2.3 From 67f4d4288c353734d29c45f6725971c71af96791 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:09 +0300 Subject: KVM: x86: rdpmc emulation checks the counter incorrectly The rdpmc emulation checks that the counter (ECX) is not higher than 2, without taking into considerations bits 30:31 role (e.g., bit 30 marks whether the counter is fixed). The fix uses the pmu information for checking the validity of the pmu counter. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/pmu.c | 9 +++++++++ arch/x86/kvm/x86.c | 7 +++++++ 5 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index a04fe4eb237d..ffa2671a7f2f 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -194,6 +194,7 @@ struct x86_emulate_ops { int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); + int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); void (*halt)(struct x86_emulate_ctxt *ctxt); void (*wbinvd)(struct x86_emulate_ctxt *ctxt); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 49314155b66c..63e020be3da7 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1070,6 +1070,7 @@ void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu); bool kvm_pmu_msr(struct kvm_vcpu *vcpu, u32 msr); int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); +int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc); int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); void kvm_handle_pmu_event(struct kvm_vcpu *vcpu); void kvm_deliver_pmi(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index be3f7645e39d..3da8d82acb31 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3507,7 +3507,7 @@ static int check_rdpmc(struct x86_emulate_ctxt *ctxt) u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || - (rcx > 3)) + ctxt->ops->check_pmc(ctxt, rcx)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index cbecaa90399c..3dd6accb64ec 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -428,6 +428,15 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; } +int kvm_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + bool fixed = pmc & (1u << 30); + pmc &= ~(3u << 30); + return (!fixed && pmc >= pmu->nr_arch_gp_counters) || + (fixed && pmc >= pmu->nr_arch_fixed_counters); +} + int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) { struct kvm_pmu *pmu = &vcpu->arch.pmu; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f32a02578c0d..451d6acea808 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4762,6 +4762,12 @@ static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, return kvm_set_msr(emul_to_vcpu(ctxt), &msr); } +static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, + u32 pmc) +{ + return kvm_pmu_check_pmc(emul_to_vcpu(ctxt), pmc); +} + static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata) { @@ -4838,6 +4844,7 @@ static const struct x86_emulate_ops emulate_ops = { .set_dr = emulator_set_dr, .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, + .check_pmc = emulator_check_pmc, .read_pmc = emulator_read_pmc, .halt = emulator_halt, .wbinvd = emulator_wbinvd, -- cgit v1.2.3 From aaa05f2437b9450f30b301db962ec4d45ec90fbb Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:10 +0300 Subject: KVM: x86: Return error on cmpxchg16b emulation cmpxchg16b is currently unimplemented in the emulator. The least we can do is return error upon the emulation of this instruction. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3da8d82acb31..a151f8d24a1d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1999,6 +1999,9 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) { u64 old = ctxt->dst.orig_val64; + if (ctxt->dst.bytes == 16) + return X86EMUL_UNHANDLEABLE; + if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); @@ -4077,7 +4080,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, op->orig_val = op->val; break; case OpMem64: - ctxt->memop.bytes = 8; + ctxt->memop.bytes = (ctxt->op_bytes == 8) ? 16 : 8; goto mem_common; case OpAcc: op->type = OP_REG; -- cgit v1.2.3 From 32e94d0696c26c6ba4f3ff53e70f6e0e825979bc Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 2 Jun 2014 18:34:11 +0300 Subject: KVM: x86: smsw emulation is incorrect in 64-bit mode In 64-bit mode, when the destination is a register, the assignment is done according to the operand size. Otherwise (memory operand or no 64-bit mode), a 16-bit assignment is performed. Currently, 16-bit assignment is always done to the destination. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a151f8d24a1d..9b5d97db7631 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3235,7 +3235,8 @@ static int em_lidt(struct x86_emulate_ctxt *ctxt) static int em_smsw(struct x86_emulate_ctxt *ctxt) { - ctxt->dst.bytes = 2; + if (ctxt->dst.type == OP_MEM) + ctxt->dst.bytes = 2; ctxt->dst.val = ctxt->ops->get_cr(ctxt, 0); return X86EMUL_CONTINUE; } -- cgit v1.2.3 From a5457e7bcf9a76ec5c2de5d311d9b0d3b724edc6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jun 2014 17:29:34 +0200 Subject: KVM: emulate: POP SS triggers a MOV SS shadow too We did not do that when interruptibility was added to the emulator, because at the time pop to segment was not implemented. Now it is, add it. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9b5d97db7631..bc670675223d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1762,6 +1762,9 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + if (ctxt->modrm_reg == VCPU_SREG_SS) + ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; + rc = load_segment_descriptor(ctxt, (u16)selector, seg); return rc; } -- cgit v1.2.3 From 968889771749d8e730d794deed2bd2e363a98a54 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Apr 2014 14:54:19 +0200 Subject: KVM: emulate: simplify BitOp handling Memory is always the destination for BitOp instructions. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index bc670675223d..ea976e478dca 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4079,7 +4079,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, mem_common: *op = ctxt->memop; ctxt->memopp = op; - if ((ctxt->d & BitOp) && op == &ctxt->dst) + if (ctxt->d & BitOp) fetch_bit_operand(ctxt); op->orig_val = op->val; break; -- cgit v1.2.3 From bdc907222c5e4edd848da0c031deb55b59f1cf9a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 6 May 2014 14:03:29 +0200 Subject: KVM: emulate: fix harmless typo in MMX decoding It was using the wrong member of the union. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ea976e478dca..8a1796b1eef2 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1093,7 +1093,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, if (ctxt->d & Mmx) { op->type = OP_MM; op->bytes = 8; - op->addr.xmm = ctxt->modrm_rm & 7; + op->addr.mm = ctxt->modrm_rm & 7; return rc; } fetch_register_operand(op); -- cgit v1.2.3 From bc39c4db7110f88f338cbbabe53d3e43c7400a59 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 14 Jun 2014 23:44:29 +0200 Subject: arch/x86/kvm/vmx.c: use PAGE_ALIGNED instead of IS_ALIGNED(PAGE_SIZE use mm.h definition Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Fabian Frederick Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 801332edefc3..4f84be0d2d1e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5918,7 +5918,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, * which replaces physical address width with 32 * */ - if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { + if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { nested_vmx_failInvalid(vcpu); skip_emulated_instruction(vcpu); return 1; @@ -5936,7 +5936,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, vmx->nested.vmxon_ptr = vmptr; break; case EXIT_REASON_VMCLEAR: - if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { + if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { nested_vmx_failValid(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS); skip_emulated_instruction(vcpu); @@ -5951,7 +5951,7 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, } break; case EXIT_REASON_VMPTRLD: - if (!IS_ALIGNED(vmptr, PAGE_SIZE) || (vmptr >> maxphyaddr)) { + if (!PAGE_ALIGNED(vmptr) || (vmptr >> maxphyaddr)) { nested_vmx_failValid(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS); skip_emulated_instruction(vcpu); @@ -8113,14 +8113,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } if ((vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_MSR_BITMAPS) && - !IS_ALIGNED(vmcs12->msr_bitmap, PAGE_SIZE)) { + !PAGE_ALIGNED(vmcs12->msr_bitmap)) { /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) && - !IS_ALIGNED(vmcs12->apic_access_addr, PAGE_SIZE)) { + !PAGE_ALIGNED(vmcs12->apic_access_addr)) { /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; -- cgit v1.2.3 From 7dec5603b6b8dc4c3e1c65d318bd2a5a8c62a424 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:12:57 +0300 Subject: KVM: x86: bit-ops emulation ignores offset on 64-bit The current emulation of bit operations ignores the offset from the destination on 64-bit target memory operands. This patch fixes this behavior. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8a1796b1eef2..439a3286406a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1220,12 +1220,14 @@ static void fetch_bit_operand(struct x86_emulate_ctxt *ctxt) long sv = 0, mask; if (ctxt->dst.type == OP_MEM && ctxt->src.type == OP_REG) { - mask = ~(ctxt->dst.bytes * 8 - 1); + mask = ~((long)ctxt->dst.bytes * 8 - 1); if (ctxt->src.bytes == 2) sv = (s16)ctxt->src.val & (s16)mask; else if (ctxt->src.bytes == 4) sv = (s32)ctxt->src.val & (s32)mask; + else + sv = (s64)ctxt->src.val & (s64)mask; ctxt->dst.addr.mem.ea += (sv >> 3); } -- cgit v1.2.3 From ee212297cd425620867d4398d55d068c4203768c Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:12:58 +0300 Subject: KVM: x86: Wrong emulation on 'xadd X, X' The emulator does not emulate the xadd instruction correctly if the two operands are the same. In this (unlikely) situation the result should be the sum of X and X (2X) when it is currently X. The solution is to first perform writeback to the source, before writing to the destination. The only instruction which should be affected is xadd, as the other instructions that perform writeback to the source use the extended accumlator (e.g., RAX:RDX). Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 439a3286406a..0d5ecbd45882 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4736,17 +4736,17 @@ special_insn: goto done; writeback: - if (!(ctxt->d & NoWrite)) { - rc = writeback(ctxt, &ctxt->dst); - if (rc != X86EMUL_CONTINUE) - goto done; - } if (ctxt->d & SrcWrite) { BUG_ON(ctxt->src.type == OP_MEM || ctxt->src.type == OP_MEM_STR); rc = writeback(ctxt, &ctxt->src); if (rc != X86EMUL_CONTINUE) goto done; } + if (!(ctxt->d & NoWrite)) { + rc = writeback(ctxt, &ctxt->dst); + if (rc != X86EMUL_CONTINUE) + goto done; + } /* * restore dst type in case the decoding will be reused -- cgit v1.2.3 From 9e8919ae793f4edfaa29694a70f71a515ae9942a Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:12:59 +0300 Subject: KVM: x86: Inter-privilege level ret emulation is not implemeneted Return unhandlable error on inter-privilege level ret instruction. This is since the current emulation does not check the privilege level correctly when loading the CS, and does not pop RSP/SS as needed. Cc: stable@vger.kernel.org Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 0d5ecbd45882..27ce38693f09 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2033,6 +2033,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; unsigned long cs; + int cpl = ctxt->ops->cpl(ctxt); rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) @@ -2042,6 +2043,9 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; + /* Outer-privilege level return is not implemented */ + if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) + return X86EMUL_UNHANDLEABLE; rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); return rc; } -- cgit v1.2.3 From 140bad89fd25db1aab60f80ed7874e9a9bdbae3b Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:13:00 +0300 Subject: KVM: x86: emulation of dword cmov on long-mode should clear [63:32] Even if the condition of cmov is not satisfied, bits[63:32] should be cleared. This is clearly stated in Intel's CMOVcc documentation. The solution is to reassign the destination onto itself if the condition is unsatisfied. For that matter the original destination value needs to be read. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 27ce38693f09..6f09b2e555ef 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3923,7 +3923,7 @@ static const struct opcode twobyte_table[256] = { N, N, N, N, N, N, N, N, N, N, /* 0x40 - 0x4F */ - X16(D(DstReg | SrcMem | ModRM | Mov)), + X16(D(DstReg | SrcMem | ModRM)), /* 0x50 - 0x5F */ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 0x60 - 0x6F */ @@ -4824,8 +4824,10 @@ twobyte_insn: ops->get_dr(ctxt, ctxt->modrm_reg, &ctxt->dst.val); break; case 0x40 ... 0x4f: /* cmov */ - ctxt->dst.val = ctxt->dst.orig_val = ctxt->src.val; - if (!test_cc(ctxt->b, ctxt->eflags)) + if (test_cc(ctxt->b, ctxt->eflags)) + ctxt->dst.val = ctxt->src.val; + else if (ctxt->mode != X86EMUL_MODE_PROT64 || + ctxt->op_bytes != 4) ctxt->dst.type = OP_NONE; /* no writeback */ break; case 0x80 ... 0x8f: /* jnz rel, etc*/ -- cgit v1.2.3 From a825f5cc4a8455663562809748240169cb9bc2c0 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 15 Jun 2014 16:13:01 +0300 Subject: KVM: x86: NOP emulation clears (incorrectly) the high 32-bits of RAX On long-mode the current NOP (0x90) emulation still writes back to RAX. As a result, EAX is zero-extended and the high 32-bits of RAX are cleared. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6f09b2e555ef..84dc4ba0364d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4688,8 +4688,9 @@ special_insn: break; case 0x90 ... 0x97: /* nop / xchg reg, rax */ if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) - break; - rc = em_xchg(ctxt); + ctxt->dst.type = OP_NONE; + else + rc = em_xchg(ctxt); break; case 0x98: /* cbw/cwde/cdqe */ switch (ctxt->op_bytes) { -- cgit v1.2.3 From e4aa5288ff07766d101751de9a8420d666c61735 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 12 Jun 2014 19:40:32 +0200 Subject: KVM: x86: Fix constant value of VM_{EXIT_SAVE,ENTRY_LOAD}_DEBUG_CONTROLS The spec says those controls are at bit position 2 - makes 4 as value. The impact of this mistake is effectively zero as we only use them to ensure that these features are set at position 2 (or, previously, 1) in MSR_IA32_VMX_{EXIT,ENTRY}_CTLS - which is and will be always true according to the spec. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 7004d21e6219..d989829d3304 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -76,7 +76,7 @@ #define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 -#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000002 +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 #define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 @@ -89,7 +89,7 @@ #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff -#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000002 +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 #define VM_ENTRY_IA32E_MODE 0x00000200 #define VM_ENTRY_SMM 0x00000400 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 -- cgit v1.2.3 From 3dbcd8da7b564194f93271b003a1c46ef404cbdb Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:40 +0200 Subject: KVM: nVMX: Advertise support for MSR_IA32_VMX_TRUE_*_CTLS We already implemented them but failed to advertise them. Currently they all return the identical values to the capability MSRs they are augmenting. So there is no change in exposed features yet. Drop related comments at this chance that are partially incorrect and redundant anyway. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/msr-index.h | 1 + arch/x86/kvm/vmx.c | 13 ++----------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index fcf2b3ae1bf0..eaefcc66c855 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -558,6 +558,7 @@ /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 +#define VMX_BASIC_TRUE_CTLS (1ULL << 55) #define VMX_BASIC_64 0x0001000000000000LLU #define VMX_BASIC_MEM_TYPE_SHIFT 50 #define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4f84be0d2d1e..31379faf952e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2265,21 +2265,13 @@ static __init void nested_vmx_setup_ctls_msrs(void) /* pin-based controls */ rdmsr(MSR_IA32_VMX_PINBASED_CTLS, nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high); - /* - * According to the Intel spec, if bit 55 of VMX_BASIC is off (as it is - * in our case), bits 1, 2 and 4 (i.e., 0x16) must be 1 in this MSR. - */ nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; - /* - * Exit controls - * If bit 55 of VMX_BASIC is off, bits 0-8 and 10, 11, 13, 14, 16 and - * 17 must be 1. - */ + /* exit controls */ rdmsr(MSR_IA32_VMX_EXIT_CTLS, nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); nested_vmx_exit_ctls_low = VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR; @@ -2299,7 +2291,6 @@ static __init void nested_vmx_setup_ctls_msrs(void) /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); - /* If bit 55 of VMX_BASIC is off, bits 0-8 and 12 must be 1. */ nested_vmx_entry_ctls_low = VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_entry_ctls_high &= #ifdef CONFIG_X86_64 @@ -2394,7 +2385,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) * guest, and the VMCS structure we give it - not about the * VMX support of the underlying hardware. */ - *pdata = VMCS12_REVISION | + *pdata = VMCS12_REVISION | VMX_BASIC_TRUE_CTLS | ((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) | (VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT); break; -- cgit v1.2.3 From 3dcdf3ec6e48d918741ea11349d4436d0c5aac93 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:41 +0200 Subject: KVM: nVMX: Allow to disable CR3 access interception We already have this control enabled by exposing a broken MSR_IA32_VMX_PROCBASED_CTLS value. This will properly advertise our capability once the value is fixed by clearing the right bits in MSR_IA32_VMX_TRUE_PROCBASED_CTLS. We also have to ensure to test the right value on L2 entry. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 31379faf952e..e55e404b5dba 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2239,6 +2239,7 @@ static inline bool nested_vmx_allowed(struct kvm_vcpu *vcpu) * or other means. */ static u32 nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high; +static u32 nested_vmx_true_procbased_ctls_low; static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; @@ -2328,6 +2329,10 @@ static __init void nested_vmx_setup_ctls_msrs(void) */ nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS; + /* We support free control of CR3 access interception. */ + nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low & + ~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING); + /* secondary cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high); @@ -2395,6 +2400,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) nested_vmx_pinbased_ctls_high); break; case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + *pdata = vmx_control_msr(nested_vmx_true_procbased_ctls_low, + nested_vmx_procbased_ctls_high); + break; case MSR_IA32_VMX_PROCBASED_CTLS: *pdata = vmx_control_msr(nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); @@ -8127,7 +8135,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, - nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high) || + nested_vmx_true_procbased_ctls_low, + nested_vmx_procbased_ctls_high) || !vmx_control_verify(vmcs12->secondary_vm_exec_control, nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high) || !vmx_control_verify(vmcs12->pin_based_vm_exec_control, -- cgit v1.2.3 From 560b7ee12ca5e1ebc1675d7eb4008bb22708277a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:42 +0200 Subject: KVM: nVMX: Fix returned value of MSR_IA32_VMX_PROCBASED_CTLS SDM says bits 1, 4-6, 8, 13-16, and 26 have to be set. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 3 +++ arch/x86/kvm/vmx.c | 5 +++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index d989829d3304..bcbfade26d8d 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -51,6 +51,9 @@ #define CPU_BASED_MONITOR_EXITING 0x20000000 #define CPU_BASED_PAUSE_EXITING 0x40000000 #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + /* * Definitions of Secondary Processor-Based VM-Execution Controls. */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e55e404b5dba..66365a009cff 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2306,7 +2306,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); - nested_vmx_procbased_ctls_low = 0; + nested_vmx_procbased_ctls_low = CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR; nested_vmx_procbased_ctls_high &= CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING | CPU_BASED_USE_TSC_OFFSETING | @@ -2327,7 +2327,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) * can use it to avoid exits to L1 - even when L0 runs L2 * without MSR bitmaps. */ - nested_vmx_procbased_ctls_high |= CPU_BASED_USE_MSR_BITMAPS; + nested_vmx_procbased_ctls_high |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR | + CPU_BASED_USE_MSR_BITMAPS; /* We support free control of CR3 access interception. */ nested_vmx_true_procbased_ctls_low = nested_vmx_procbased_ctls_low & -- cgit v1.2.3 From 2996fca0690f03a5220203588f4a0d8c5acba2b0 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:43 +0200 Subject: KVM: nVMX: Allow to disable VM_{ENTRY_LOAD,EXIT_SAVE}_DEBUG_CONTROLS Allow L1 to "leak" its debug controls into L2, i.e. permit cleared VM_{ENTRY_LOAD,EXIT_SAVE}_DEBUG_CONTROLS. This requires to manually transfer the state of DR7 and IA32_DEBUGCTLMSR from L1 into L2 as both run on different VMCS. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 66365a009cff..b93e2ae2bb62 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -383,6 +383,9 @@ struct nested_vmx { struct hrtimer preemption_timer; bool preemption_timer_expired; + + /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ + u64 vmcs01_debugctl; }; #define POSTED_INTR_ON 0 @@ -2243,7 +2246,9 @@ static u32 nested_vmx_true_procbased_ctls_low; static u32 nested_vmx_secondary_ctls_low, nested_vmx_secondary_ctls_high; static u32 nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high; static u32 nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high; +static u32 nested_vmx_true_exit_ctls_low; static u32 nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high; +static u32 nested_vmx_true_entry_ctls_low; static u32 nested_vmx_misc_low, nested_vmx_misc_high; static u32 nested_vmx_ept_caps; static __init void nested_vmx_setup_ctls_msrs(void) @@ -2289,6 +2294,10 @@ static __init void nested_vmx_setup_ctls_msrs(void) if (vmx_mpx_supported()) nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; + /* We support free control of debug control saving. */ + nested_vmx_true_exit_ctls_low = nested_vmx_exit_ctls_low & + ~VM_EXIT_SAVE_DEBUG_CONTROLS; + /* entry controls */ rdmsr(MSR_IA32_VMX_ENTRY_CTLS, nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); @@ -2303,6 +2312,10 @@ static __init void nested_vmx_setup_ctls_msrs(void) if (vmx_mpx_supported()) nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; + /* We support free control of debug control loading. */ + nested_vmx_true_entry_ctls_low = nested_vmx_entry_ctls_low & + ~VM_ENTRY_LOAD_DEBUG_CONTROLS; + /* cpu-based controls */ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, nested_vmx_procbased_ctls_low, nested_vmx_procbased_ctls_high); @@ -2409,11 +2422,17 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) nested_vmx_procbased_ctls_high); break; case MSR_IA32_VMX_TRUE_EXIT_CTLS: + *pdata = vmx_control_msr(nested_vmx_true_exit_ctls_low, + nested_vmx_exit_ctls_high); + break; case MSR_IA32_VMX_EXIT_CTLS: *pdata = vmx_control_msr(nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high); break; case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + *pdata = vmx_control_msr(nested_vmx_true_entry_ctls_low, + nested_vmx_entry_ctls_high); + break; case MSR_IA32_VMX_ENTRY_CTLS: *pdata = vmx_control_msr(nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high); @@ -7836,7 +7855,13 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); - vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { + kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); + } else { + kvm_set_dr(vcpu, 7, vcpu->arch.dr7); + vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); + } vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, vmcs12->vm_entry_intr_info_field); vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, @@ -7846,7 +7871,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, vmcs12->guest_interruptibility_info); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); - kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); vmx_set_rflags(vcpu, vmcs12->guest_rflags); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, vmcs12->guest_pending_dbg_exceptions); @@ -8143,9 +8167,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) !vmx_control_verify(vmcs12->pin_based_vm_exec_control, nested_vmx_pinbased_ctls_low, nested_vmx_pinbased_ctls_high) || !vmx_control_verify(vmcs12->vm_exit_controls, - nested_vmx_exit_ctls_low, nested_vmx_exit_ctls_high) || + nested_vmx_true_exit_ctls_low, + nested_vmx_exit_ctls_high) || !vmx_control_verify(vmcs12->vm_entry_controls, - nested_vmx_entry_ctls_low, nested_vmx_entry_ctls_high)) + nested_vmx_true_entry_ctls_low, + nested_vmx_entry_ctls_high)) { nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; @@ -8222,6 +8248,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); + if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) + vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + cpu = get_cpu(); vmx->loaded_vmcs = vmcs02; vmx_vcpu_put(vcpu); @@ -8399,7 +8428,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); - kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); vmcs12->guest_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); vmcs12->guest_rip = kvm_register_read(vcpu, VCPU_REGS_RIP); vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS); @@ -8478,9 +8506,13 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); + if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS) { + kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7); + vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + } + /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ - vmcs12->guest_ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT) vmcs12->guest_ia32_pat = vmcs_read64(GUEST_IA32_PAT); if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER) -- cgit v1.2.3 From 5381417f6a51293e7b8af1eb18aefa5d47976a71 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Jun 2014 13:59:44 +0200 Subject: KVM: nVMX: Fix returned value of MSR_IA32_VMX_VMCS_ENUM Many real CPUs get this wrong as well, but ours is totally off: bits 9:1 define the highest index value. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b93e2ae2bb62..a717c13b9466 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2461,7 +2461,7 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) *pdata = -1ULL; break; case MSR_IA32_VMX_VMCS_ENUM: - *pdata = 0x1f; + *pdata = 0x2e; /* highest index: VMX_PREEMPTION_TIMER_VALUE */ break; case MSR_IA32_VMX_PROCBASED_CTLS2: *pdata = vmx_control_msr(nested_vmx_secondary_ctls_low, -- cgit v1.2.3 From 5777392e83c96e3a0799dd2985598e0fc76cf4aa Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:23 +0300 Subject: KVM: x86: check DR6/7 high-bits are clear only on long-mode When the guest sets DR6 and DR7, KVM asserts the high 32-bits are clear, and otherwise injects a #GP exception. This exception should only be injected only if running in long-mode. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 2 +- arch/x86/kvm/x86.h | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a717c13b9466..b362a1a38e51 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5184,7 +5184,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) return 1; kvm_register_write(vcpu, reg, val); } else - if (kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg))) + if (kvm_set_dr(vcpu, dr, kvm_register_readl(vcpu, reg))) return 1; skip_emulated_instruction(vcpu); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 8c97bac9a895..c5b61a7eb144 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -47,6 +47,16 @@ static inline int is_long_mode(struct kvm_vcpu *vcpu) #endif } +static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) +{ + int cs_db, cs_l; + + if (!is_long_mode(vcpu)) + return false; + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); + return cs_l; +} + static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) { return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; @@ -108,6 +118,14 @@ static inline bool vcpu_match_mmio_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) return false; } +static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, + enum kvm_reg reg) +{ + unsigned long val = kvm_register_read(vcpu, reg); + + return is_64_bit_mode(vcpu) ? val : (u32)val; +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); -- cgit v1.2.3 From a449c7aa51e10c9bde0ea9bee4e682d6d067ebab Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:24 +0300 Subject: KVM: x86: Hypercall handling does not considers opsize correctly Currently, the hypercall handling routine only considers LME as an indication to whether the guest uses 32/64-bit mode. This is incosistent with hyperv hypercalls handling and against the common sense of considering cs.l as well. This patch uses is_64_bit_mode instead of is_long_mode for that matter. In addition, the result is masked in respect to the guest execution mode. Last, it changes kvm_hv_hypercall to use is_64_bit_mode as well to simplify the code. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 451d6acea808..874607ae0583 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5669,7 +5669,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) u64 param, ingpa, outgpa, ret; uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; bool fast, longmode; - int cs_db, cs_l; /* * hypercall generates UD from non zero cpl and real mode @@ -5680,8 +5679,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return 0; } - kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - longmode = is_long_mode(vcpu) && cs_l == 1; + longmode = is_64_bit_mode(vcpu); if (!longmode) { param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | @@ -5746,7 +5744,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; - int r = 1; + int op_64_bit, r = 1; if (kvm_hv_hypercall_enabled(vcpu->kvm)) return kvm_hv_hypercall(vcpu); @@ -5759,7 +5757,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) trace_kvm_hypercall(nr, a0, a1, a2, a3); - if (!is_long_mode(vcpu)) { + op_64_bit = is_64_bit_mode(vcpu); + if (!op_64_bit) { nr &= 0xFFFFFFFF; a0 &= 0xFFFFFFFF; a1 &= 0xFFFFFFFF; @@ -5785,6 +5784,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) break; } out: + if (!op_64_bit) + ret = (u32)ret; kvm_register_write(vcpu, VCPU_REGS_RAX, ret); ++vcpu->stat.hypercalls; return r; -- cgit v1.2.3 From 1e32c07955b43e7f827174bf320ed35971117275 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:25 +0300 Subject: KVM: vmx: handle_cr ignores 32/64-bit mode On 32-bit mode only bits [31:0] of the CR should be used for setting the CR value. Otherwise, the host may incorrectly assume the value is invalid if bits [63:32] are not zero. Moreover, the CR is currently being read twice when CR8 is used. Last, nested mov-cr exiting is modified to handle the CR value correctly as well. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b362a1a38e51..45024bf0e229 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5058,7 +5058,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) reg = (exit_qualification >> 8) & 15; switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ - val = kvm_register_read(vcpu, reg); + val = kvm_register_readl(vcpu, reg); trace_kvm_cr_write(cr, val); switch (cr) { case 0: @@ -5075,7 +5075,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) return 1; case 8: { u8 cr8_prev = kvm_get_cr8(vcpu); - u8 cr8 = kvm_register_read(vcpu, reg); + u8 cr8 = (u8)val; err = kvm_set_cr8(vcpu, cr8); kvm_complete_insn_gp(vcpu, err); if (irqchip_in_kernel(vcpu->kvm)) @@ -6770,7 +6770,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); int cr = exit_qualification & 15; int reg = (exit_qualification >> 8) & 15; - unsigned long val = kvm_register_read(vcpu, reg); + unsigned long val = kvm_register_readl(vcpu, reg); switch ((exit_qualification >> 4) & 3) { case 0: /* mov to cr */ -- cgit v1.2.3 From 27e6fb5dae2819d17f38dc9224692b771e989981 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Wed, 18 Jun 2014 17:19:26 +0300 Subject: KVM: vmx: vmx instructions handling does not consider cs.l VMX instructions use 32-bit operands in 32-bit mode, and 64-bit operands in 64-bit mode. The current implementation is broken since it does not use the register operands correctly, and always uses 64-bit for reads and writes. Moreover, write to memory in vmwrite only considers long-mode, so it ignores cs.l. This patch fixes this behavior. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 12 ++++++------ arch/x86/kvm/x86.h | 9 +++++++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 45024bf0e229..8748c2e19ed6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6403,7 +6403,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) return 1; /* Decode instruction info and find the field to read */ - field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); /* Read the field, zero-extended to a u64 field_value */ if (!vmcs12_read_any(vcpu, field, &field_value)) { nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); @@ -6416,7 +6416,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) * on the guest's mode (32 or 64 bit), not on the given field's length. */ if (vmx_instruction_info & (1u << 10)) { - kvm_register_write(vcpu, (((vmx_instruction_info) >> 3) & 0xf), + kvm_register_writel(vcpu, (((vmx_instruction_info) >> 3) & 0xf), field_value); } else { if (get_vmx_mem_address(vcpu, exit_qualification, @@ -6453,21 +6453,21 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return 1; if (vmx_instruction_info & (1u << 10)) - field_value = kvm_register_read(vcpu, + field_value = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 3) & 0xf)); else { if (get_vmx_mem_address(vcpu, exit_qualification, vmx_instruction_info, &gva)) return 1; if (kvm_read_guest_virt(&vcpu->arch.emulate_ctxt, gva, - &field_value, (is_long_mode(vcpu) ? 8 : 4), &e)) { + &field_value, (is_64_bit_mode(vcpu) ? 8 : 4), &e)) { kvm_inject_page_fault(vcpu, &e); return 1; } } - field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); + field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf)); if (vmcs_field_readonly(field)) { nested_vmx_failValid(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT); @@ -6590,7 +6590,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) } vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); - type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf); + type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf); types = (nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index c5b61a7eb144..306a1b77581f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -126,6 +126,15 @@ static inline unsigned long kvm_register_readl(struct kvm_vcpu *vcpu, return is_64_bit_mode(vcpu) ? val : (u32)val; } +static inline void kvm_register_writel(struct kvm_vcpu *vcpu, + enum kvm_reg reg, + unsigned long val) +{ + if (!is_64_bit_mode(vcpu)) + val = (u32)val; + return kvm_register_write(vcpu, reg, val); +} + void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); -- cgit v1.2.3 From d116e812f9026e3cca46ce1009e577afec62916d Mon Sep 17 00:00:00 2001 From: Deng-Cheng Zhu Date: Thu, 26 Jun 2014 12:11:34 -0700 Subject: MIPS: KVM: Reformat code and comments No logic changes inside. Signed-off-by: Deng-Cheng Zhu Reviewed-by: James Hogan Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 10 +- arch/mips/include/asm/r4kcache.h | 3 + arch/mips/kvm/kvm_locore.S | 55 +++--- arch/mips/kvm/kvm_mips.c | 179 +++++++++--------- arch/mips/kvm/kvm_mips_comm.h | 21 +- arch/mips/kvm/kvm_mips_commpage.c | 21 +- arch/mips/kvm/kvm_mips_dyntrans.c | 38 ++-- arch/mips/kvm/kvm_mips_emul.c | 389 +++++++++++++++++++------------------- arch/mips/kvm/kvm_mips_int.c | 45 +++-- arch/mips/kvm/kvm_mips_int.h | 22 ++- arch/mips/kvm/kvm_mips_opcode.h | 26 ++- arch/mips/kvm/kvm_mips_stats.c | 18 +- arch/mips/kvm/kvm_tlb.c | 194 +++++++++---------- arch/mips/kvm/kvm_trap_emul.c | 77 ++++---- arch/mips/kvm/trace.h | 18 +- 15 files changed, 564 insertions(+), 552 deletions(-) diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index b0aa95565752..3f813f295134 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -359,13 +359,17 @@ enum emulation_result { #define MIPS3_PG_FRAME 0x3fffffc0 #define VPN2_MASK 0xffffe000 -#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \ +#define TLB_IS_GLOBAL(x) (((x).tlb_lo0 & MIPS3_PG_G) && \ ((x).tlb_lo1 & MIPS3_PG_G)) #define TLB_VPN2(x) ((x).tlb_hi & VPN2_MASK) #define TLB_ASID(x) ((x).tlb_hi & ASID_MASK) -#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \ - ? ((x).tlb_lo1 & MIPS3_PG_V) \ +#define TLB_IS_VALID(x, va) (((va) & (1 << PAGE_SHIFT)) \ + ? ((x).tlb_lo1 & MIPS3_PG_V) \ : ((x).tlb_lo0 & MIPS3_PG_V)) +#define TLB_HI_VPN2_HIT(x, y) ((TLB_VPN2(x) & ~(x).tlb_mask) == \ + ((y) & VPN2_MASK & ~(x).tlb_mask)) +#define TLB_HI_ASID_HIT(x, y) (TLB_IS_GLOBAL(x) || \ + TLB_ASID(x) == ((y) & ASID_MASK)) struct kvm_mips_tlb { long tlb_mask; diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index 0b8bd28a0df1..4520adc8699b 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -19,6 +19,9 @@ #include #include /* for segment_eq() */ +extern void (*r4k_blast_dcache)(void); +extern void (*r4k_blast_icache)(void); + /* * This macro return a properly sign-extended address suitable as base address * for indexed cache operations. Two issues here: diff --git a/arch/mips/kvm/kvm_locore.S b/arch/mips/kvm/kvm_locore.S index 033ac343e72c..d7279c03c517 100644 --- a/arch/mips/kvm/kvm_locore.S +++ b/arch/mips/kvm/kvm_locore.S @@ -16,7 +16,6 @@ #include #include - #define _C_LABEL(x) x #define MIPSX(name) mips32_ ## name #define CALLFRAME_SIZ 32 @@ -91,7 +90,10 @@ FEXPORT(__kvm_mips_vcpu_run) LONG_S $24, PT_R24(k1) LONG_S $25, PT_R25(k1) - /* XXXKYMA k0/k1 not saved, not being used if we got here through an ioctl() */ + /* + * XXXKYMA k0/k1 not saved, not being used if we got here through + * an ioctl() + */ LONG_S $28, PT_R28(k1) LONG_S $29, PT_R29(k1) @@ -132,7 +134,10 @@ FEXPORT(__kvm_mips_vcpu_run) /* Save the kernel gp as well */ LONG_S gp, VCPU_HOST_GP(k1) - /* Setup status register for running the guest in UM, interrupts are disabled */ + /* + * Setup status register for running the guest in UM, interrupts + * are disabled + */ li k0, (ST0_EXL | KSU_USER | ST0_BEV) mtc0 k0, CP0_STATUS ehb @@ -152,7 +157,6 @@ FEXPORT(__kvm_mips_vcpu_run) mtc0 k0, CP0_STATUS ehb - /* Set Guest EPC */ LONG_L t0, VCPU_PC(k1) mtc0 t0, CP0_EPC @@ -165,7 +169,7 @@ FEXPORT(__kvm_mips_load_asid) INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: - /* t1: contains the base of the ASID array, need to get the cpu id */ + /* t1: contains the base of the ASID array, need to get the cpu id */ LONG_L t2, TI_CPU($28) /* smp_processor_id */ INT_SLL t2, t2, 2 /* x4 */ REG_ADDU t3, t1, t2 @@ -229,9 +233,7 @@ FEXPORT(__kvm_mips_load_k0k1) eret VECTOR(MIPSX(exception), unknown) -/* - * Find out what mode we came from and jump to the proper handler. - */ +/* Find out what mode we came from and jump to the proper handler. */ mtc0 k0, CP0_ERROREPC #01: Save guest k0 ehb #02: @@ -239,7 +241,8 @@ VECTOR(MIPSX(exception), unknown) INT_SRL k0, k0, 10 #03: Get rid of CPUNum INT_SLL k0, k0, 10 #04 LONG_S k1, 0x3000(k0) #05: Save k1 @ offset 0x3000 - INT_ADDIU k0, k0, 0x2000 #06: Exception handler is installed @ offset 0x2000 + INT_ADDIU k0, k0, 0x2000 #06: Exception handler is + # installed @ offset 0x2000 j k0 #07: jump to the function nop #08: branch delay slot VECTOR_END(MIPSX(exceptionEnd)) @@ -248,7 +251,6 @@ VECTOR_END(MIPSX(exceptionEnd)) /* * Generic Guest exception handler. We end up here when the guest * does something that causes a trap to kernel mode. - * */ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Get the VCPU pointer from DDTATA_LO */ @@ -290,9 +292,7 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) LONG_S $30, VCPU_R30(k1) LONG_S $31, VCPU_R31(k1) - /* We need to save hi/lo and restore them on - * the way out - */ + /* We need to save hi/lo and restore them on the way out */ mfhi t0 LONG_S t0, VCPU_HI(k1) @@ -321,8 +321,10 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Save pointer to run in s0, will be saved by the compiler */ move s0, a0 - /* Save Host level EPC, BadVaddr and Cause to VCPU, useful to - * process the exception */ + /* + * Save Host level EPC, BadVaddr and Cause to VCPU, useful to + * process the exception + */ mfc0 k0,CP0_EPC LONG_S k0, VCPU_PC(k1) @@ -351,7 +353,6 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) LONG_L k0, VCPU_HOST_EBASE(k1) mtc0 k0,CP0_EBASE - /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ .set at and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) @@ -369,7 +370,8 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Saved host state */ INT_ADDIU sp, sp, -PT_SIZE - /* XXXKYMA do we need to load the host ASID, maybe not because the + /* + * XXXKYMA do we need to load the host ASID, maybe not because the * kernel entries are marked GLOBAL, need to verify */ @@ -383,9 +385,11 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) /* Jump to handler */ FEXPORT(__kvm_mips_jump_to_handler) - /* XXXKYMA: not sure if this is safe, how large is the stack?? + /* + * XXXKYMA: not sure if this is safe, how large is the stack?? * Now jump to the kvm_mips_handle_exit() to see if we can deal - * with this in the kernel */ + * with this in the kernel + */ PTR_LA t9, kvm_mips_handle_exit jalr.hb t9 INT_ADDIU sp, sp, -CALLFRAME_SIZ /* BD Slot */ @@ -394,7 +398,8 @@ FEXPORT(__kvm_mips_jump_to_handler) di ehb - /* XXXKYMA: k0/k1 could have been blown away if we processed + /* + * XXXKYMA: k0/k1 could have been blown away if we processed * an exception while we were handling the exception from the * guest, reload k1 */ @@ -402,7 +407,8 @@ FEXPORT(__kvm_mips_jump_to_handler) move k1, s1 INT_ADDIU k1, k1, VCPU_HOST_ARCH - /* Check return value, should tell us if we are returning to the + /* + * Check return value, should tell us if we are returning to the * host (handle I/O etc)or resuming the guest */ andi t0, v0, RESUME_HOST @@ -521,8 +527,10 @@ __kvm_mips_return_to_host: LONG_L $0, PT_R0(k1) LONG_L $1, PT_R1(k1) - /* r2/v0 is the return code, shift it down by 2 (arithmetic) - * to recover the err code */ + /* + * r2/v0 is the return code, shift it down by 2 (arithmetic) + * to recover the err code + */ INT_SRA k0, v0, 2 move $2, k0 @@ -566,7 +574,6 @@ __kvm_mips_return_to_host: PTR_LI k0, 0x2000000F mtc0 k0, CP0_HWRENA - /* Restore RA, which is the address we will return to */ LONG_L ra, PT_R31(k1) j ra diff --git a/arch/mips/kvm/kvm_mips.c b/arch/mips/kvm/kvm_mips.c index cd5e4f568439..52be52adf030 100644 --- a/arch/mips/kvm/kvm_mips.c +++ b/arch/mips/kvm/kvm_mips.c @@ -7,7 +7,7 @@ * * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved. * Authors: Sanjay Lal -*/ + */ #include #include @@ -31,38 +31,41 @@ #define VECTORSPACING 0x100 /* for EI/VI mode */ #endif -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x) struct kvm_stats_debugfs_item debugfs_entries[] = { - { "wait", VCPU_STAT(wait_exits) }, - { "cache", VCPU_STAT(cache_exits) }, - { "signal", VCPU_STAT(signal_exits) }, - { "interrupt", VCPU_STAT(int_exits) }, - { "cop_unsuable", VCPU_STAT(cop_unusable_exits) }, - { "tlbmod", VCPU_STAT(tlbmod_exits) }, - { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits) }, - { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits) }, - { "addrerr_st", VCPU_STAT(addrerr_st_exits) }, - { "addrerr_ld", VCPU_STAT(addrerr_ld_exits) }, - { "syscall", VCPU_STAT(syscall_exits) }, - { "resvd_inst", VCPU_STAT(resvd_inst_exits) }, - { "break_inst", VCPU_STAT(break_inst_exits) }, - { "flush_dcache", VCPU_STAT(flush_dcache_exits) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, + { "wait", VCPU_STAT(wait_exits), KVM_STAT_VCPU }, + { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, + { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, + { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, + { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, + { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, + { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, + { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, + { "addrerr_st", VCPU_STAT(addrerr_st_exits), KVM_STAT_VCPU }, + { "addrerr_ld", VCPU_STAT(addrerr_ld_exits), KVM_STAT_VCPU }, + { "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU }, + { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, + { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, + { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, + { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, {NULL} }; static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu) { int i; + for_each_possible_cpu(i) { vcpu->arch.guest_kernel_asid[i] = 0; vcpu->arch.guest_user_asid[i] = 0; } + return 0; } -/* XX