summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 09:47:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-13 09:47:01 -0700
commit900360131066f192c82311a098d03d6ac6429e20 (patch)
treee9681537a2d1f75fa5be21d8f1116f9f0ba8a391 /arch/x86/kvm
parent4541fec3104bef0c60633f9e180be94ea5ccc2b7 (diff)
parentca3f0874723fad81d0c701b63ae3a17a408d5f25 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "First batch of KVM changes for 4.1 The most interesting bit here is irqfd/ioeventfd support for ARM and ARM64. Summary: ARM/ARM64: fixes for live migration, irqfd and ioeventfd support (enabling vhost, too), page aging s390: interrupt handling rework, allowing to inject all local interrupts via new ioctl and to get/set the full local irq state for migration and introspection. New ioctls to access memory by virtual address, and to get/set the guest storage keys. SIMD support. MIPS: FPU and MIPS SIMD Architecture (MSA) support. Includes some patches from Ralf Baechle's MIPS tree. x86: bugfixes (notably for pvclock, the others are small) and cleanups. Another small latency improvement for the TSC deadline timer" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (146 commits) KVM: use slowpath for cross page cached accesses kvm: mmu: lazy collapse small sptes into large sptes KVM: x86: Clear CR2 on VCPU reset KVM: x86: DR0-DR3 are not clear on reset KVM: x86: BSP in MSR_IA32_APICBASE is writable KVM: x86: simplify kvm_apic_map KVM: x86: avoid logical_map when it is invalid KVM: x86: fix mixed APIC mode broadcast KVM: x86: use MDA for interrupt matching kvm/ppc/mpic: drop unused IRQ_testbit KVM: nVMX: remove unnecessary double caching of MAXPHYADDR KVM: nVMX: checks for address bits beyond MAXPHYADDR on VM-entry KVM: x86: cache maxphyaddr CPUID leaf in struct kvm_vcpu KVM: vmx: pass error code with internal error #2 x86: vdso: fix pvclock races with task migration KVM: remove kvm_read_hva and kvm_read_hva_atomic KVM: x86: optimize delivery of TSC deadline timer interrupt KVM: x86: extract blocking logic from __vcpu_run kvm: x86: fix x86 eflags fixed bit KVM: s390: migrate vcpu interrupt state ...
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/cpuid.c33
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c193
-rw-r--r--arch/x86/kvm/i8254.c14
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c12
-rw-r--r--arch/x86/kvm/ioapic.c22
-rw-r--r--arch/x86/kvm/ioapic.h11
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/lapic.c147
-rw-r--r--arch/x86/kvm/lapic.h17
-rw-r--r--arch/x86/kvm/mmu.c73
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm.c43
-rw-r--r--arch/x86/kvm/vmx.c146
-rw-r--r--arch/x86/kvm/x86.c157
17 files changed, 528 insertions, 356 deletions
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 08f790dfadc9..16e8f962eaad 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,5 +1,5 @@
-ccflags-y += -Ivirt/kvm -Iarch/x86/kvm
+ccflags-y += -Iarch/x86/kvm
CFLAGS_x86.o := -I.
CFLAGS_svm.o := -I.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8a80737ee6e6..59b69f6a2844 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -104,6 +104,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
((best->eax & 0xff00) >> 8) != 0)
return -EINVAL;
+ /* Update physical-address width */
+ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
kvm_pmu_cpuid_update(vcpu);
return 0;
}
@@ -135,6 +138,21 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
}
}
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
+ if (!best || best->eax < 0x80000008)
+ goto not_found;
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best)
+ return best->eax & 0xff;
+not_found:
+ return 36;
+}
+EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
+
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
@@ -757,21 +775,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
- if (!best || best->eax < 0x80000008)
- goto not_found;
- best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
- if (best)
- return best->eax & 0xff;
-not_found:
- return 36;
-}
-EXPORT_SYMBOL_GPL(cpuid_maxphyaddr);
-
/*
* If no match is found, check whether we exceed the vCPU's limit
* and return the content of the highest valid _standard_ leaf instead.
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 4452eedfaedd..c3b1ad9fca81 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -20,13 +20,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry2 __user *entries);
void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
+
+static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.maxphyaddr;
+}
static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
if (!static_cpu_has(X86_FEATURE_XSAVE))
- return 0;
+ return false;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
return best && (best->ecx & bit(X86_FEATURE_XSAVE));
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 106c01557f2b..630bcb0d7a04 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -248,27 +248,7 @@ struct mode_dual {
struct opcode mode64;
};
-/* EFLAGS bit definitions. */
-#define EFLG_ID (1<<21)
-#define EFLG_VIP (1<<20)
-#define EFLG_VIF (1<<19)
-#define EFLG_AC (1<<18)
-#define EFLG_VM (1<<17)
-#define EFLG_RF (1<<16)
-#define EFLG_IOPL (3<<12)
-#define EFLG_NT (1<<14)
-#define EFLG_OF (1<<11)
-#define EFLG_DF (1<<10)
-#define EFLG_IF (1<<9)
-#define EFLG_TF (1<<8)
-#define EFLG_SF (1<<7)
-#define EFLG_ZF (1<<6)
-#define EFLG_AF (1<<4)
-#define EFLG_PF (1<<2)
-#define EFLG_CF (1<<0)
-
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
-#define EFLG_RESERVED_ONE_MASK 2
enum x86_transfer_type {
X86_TRANSFER_NONE,
@@ -317,7 +297,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
* These EFLAGS bits are restored from saved value during emulation, and
* any changes are written back to the saved value after emulation.
*/
-#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
+#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
+ X86_EFLAGS_PF|X86_EFLAGS_CF)
#ifdef CONFIG_X86_64
#define ON64(x) x
@@ -478,6 +459,25 @@ static void assign_masked(ulong *dest, ulong src, ulong mask)
*dest = (*dest & ~mask) | (src & mask);
}
+static void assign_register(unsigned long *reg, u64 val, int bytes)
+{
+ /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+ switch (bytes) {
+ case 1:
+ *(u8 *)reg = (u8)val;
+ break;
+ case 2:
+ *(u16 *)reg = (u16)val;
+ break;
+ case 4:
+ *reg = (u32)val;
+ break; /* 64b: zero-extend */
+ case 8:
+ *reg = val;
+ break;
+ }
+}
+
static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
{
return (1UL << (ctxt->ad_bytes << 3)) - 1;
@@ -943,6 +943,22 @@ FASTOP2(xadd);
FASTOP2R(cmp, cmp_r);
+static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
+{
+ /* If src is zero, do not writeback, but update flags */
+ if (ctxt->src.val == 0)
+ ctxt->dst.type = OP_NONE;
+ return fastop(ctxt, em_bsf);
+}
+
+static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
+{
+ /* If src is zero, do not writeback, but update flags */
+ if (ctxt->src.val == 0)
+ ctxt->dst.type = OP_NONE;
+ return fastop(ctxt, em_bsr);
+}
+
static u8 test_cc(unsigned int condition, unsigned long flags)
{
u8 rc;
@@ -1399,7 +1415,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
unsigned int in_page, n;
unsigned int count = ctxt->rep_prefix ?
address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
- in_page = (ctxt->eflags & EFLG_DF) ?
+ in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
@@ -1412,7 +1428,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
}
if (ctxt->rep_prefix && (ctxt->d & String) &&
- !(ctxt->eflags & EFLG_DF)) {
+ !(ctxt->eflags & X86_EFLAGS_DF)) {
ctxt->dst.data = rc->data + rc->pos;
ctxt->dst.type = OP_MEM_STR;
ctxt->dst.count = (rc->end - rc->pos) / size;
@@ -1691,21 +1707,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
static void write_register_operand(struct operand *op)
{
- /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
- switch (op->bytes) {
- case 1:
- *(u8 *)op->addr.reg = (u8)op->val;
- break;
- case 2:
- *(u16 *)op->addr.reg = (u16)op->val;
- break;
- case 4:
- *op->addr.reg = (u32)op->val;
- break; /* 64b: zero-extend */
- case 8:
- *op->addr.reg = op->val;
- break;
- }
+ return assign_register(op->addr.reg, op->val, op->bytes);
}
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
@@ -1792,32 +1794,34 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
{
int rc;
unsigned long val, change_mask;
- int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+ int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
int cpl = ctxt->ops->cpl(ctxt);
rc = emulate_pop(ctxt, &val, len);
if (rc != X86EMUL_CONTINUE)
return rc;
- change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
- | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID;
+ change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+ X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
+ X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
+ X86_EFLAGS_AC | X86_EFLAGS_ID;
switch(ctxt->mode) {
case X86EMUL_MODE_PROT64:
case X86EMUL_MODE_PROT32:
case X86EMUL_MODE_PROT16:
if (cpl == 0)
- change_mask |= EFLG_IOPL;
+ change_mask |= X86_EFLAGS_IOPL;
if (cpl <= iopl)
- change_mask |= EFLG_IF;
+ change_mask |= X86_EFLAGS_IF;
break;
case X86EMUL_MODE_VM86:
if (iopl < 3)
return emulate_gp(ctxt, 0);
- change_mask |= EFLG_IF;
+ change_mask |= X86_EFLAGS_IF;
break;
default: /* real mode */
- change_mask |= (EFLG_IOPL | EFLG_IF);
+ change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
break;
}
@@ -1918,7 +1922,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
static int em_pushf(struct x86_emulate_ctxt *ctxt)
{
- ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
+ ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
return em_push(ctxt);
}
@@ -1926,6 +1930,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
{
int rc = X86EMUL_CONTINUE;
int reg = VCPU_REGS_RDI;
+ u32 val;
while (reg >= VCPU_REGS_RAX) {
if (reg == VCPU_REGS_RSP) {
@@ -1933,9 +1938,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
--reg;
}
- rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
+ rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
break;
+ assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
--reg;
}
return rc;
@@ -1956,7 +1962,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
+ ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
rc = em_push(ctxt);
@@ -2022,10 +2028,14 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
unsigned long temp_eip = 0;
unsigned long temp_eflags = 0;
unsigned long cs = 0;
- unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
- EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
- EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
- unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
+ unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+ X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
+ X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
+ X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
+ X86_EFLAGS_AC | X86_EFLAGS_ID |
+ X86_EFLAGS_FIXED;
+ unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
+ X86_EFLAGS_VIP;
/* TODO: Add stack limit check */
@@ -2054,7 +2064,6 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
ctxt->_eip = temp_eip;
-
if (ctxt->op_bytes == 4)
ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
else if (ctxt->op_bytes == 2) {
@@ -2063,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
}
ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
- ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+ ctxt->eflags |= X86_EFLAGS_FIXED;
ctxt->ops->set_nmi_mask(ctxt, false);
return rc;
@@ -2145,12 +2154,12 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
- ctxt->eflags &= ~EFLG_ZF;
+ ctxt->eflags &= ~X86_EFLAGS_ZF;
} else {
ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
(u32) reg_read(ctxt, VCPU_REGS_RBX);
- ctxt->eflags |= EFLG_ZF;
+ ctxt->eflags |= X86_EFLAGS_ZF;
}
return X86EMUL_CONTINUE;
}
@@ -2222,7 +2231,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
ctxt->src.val = ctxt->dst.orig_val;
fastop(ctxt, em_cmp);
- if (ctxt->eflags & EFLG_ZF) {
+ if (ctxt->eflags & X86_EFLAGS_ZF) {
/* Success: write back to memory; no update of EAX */
ctxt->src.type = OP_NONE;
ctxt->dst.val = ctxt->src.orig_val;
@@ -2381,14 +2390,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
ctxt->eflags &= ~msr_data;
- ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+ ctxt->eflags |= X86_EFLAGS_FIXED;
#endif
} else {
/* legacy mode */
ops->get_msr(ctxt, MSR_STAR, &msr_data);
ctxt->_eip = (u32)msr_data;
- ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
+ ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
}
return X86EMUL_CONTINUE;
@@ -2425,8 +2434,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
if ((msr_data & 0xfffc) == 0x0)
return emulate_gp(ctxt, 0);
- ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
- cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
+ ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
+ cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
ss_sel = cs_sel + 8;
if (efer & EFER_LMA) {
cs.d = 0;
@@ -2493,8 +2502,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
return emulate_gp(ctxt, 0);
break;
}
- cs_sel |= SELECTOR_RPL_MASK;
- ss_sel |= SELECTOR_RPL_MASK;
+ cs_sel |= SEGMENT_RPL_MASK;
+ ss_sel |= SEGMENT_RPL_MASK;
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
@@ -2512,7 +2521,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
return false;
if (ctxt->mode == X86EMUL_MODE_VM86)
return true;
- iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+ iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
return ctxt->ops->cpl(ctxt) > iopl;
}
@@ -2782,10 +2791,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
return ret;
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
X86_TRANSFER_TASK_SWITCH, NULL);
- if (ret != X86EMUL_CONTINUE)
- return ret;
- return X86EMUL_CONTINUE;
+ return ret;
}
static int task_switch_32(struct x86_emulate_ctxt *ctxt,
@@ -2954,7 +2961,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
struct operand *op)
{
- int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
+ int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
register_address_increment(ctxt, reg, df * op->bytes);
op->addr.mem.ea = register_address(ctxt, reg);
@@ -3323,7 +3330,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static int em_vmcall(struct x86_emulate_ctxt *ctxt)
+static int em_hypercall(struct x86_emulate_ctxt *ctxt)
{
int rc = ctxt->ops->fix_hypercall(ctxt);
@@ -3395,17 +3402,6 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
return em_lgdt_lidt(ctxt, true);
}
-static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
-{
- int rc;
-
- rc = ctxt->ops->fix_hypercall(ctxt);
-
- /* Disable writeback. */
- ctxt->dst.type = OP_NONE;
- return rc;
-}
-
static int em_lidt(struct x86_emulate_ctxt *ctxt)
{
return em_lgdt_lidt(ctxt, false);
@@ -3504,7 +3500,8 @@ static int em_sahf(struct x86_emulate_ctxt *ctxt)
{
u32 flags;
- flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
+ flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
+ X86_EFLAGS_SF;
flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
ctxt->eflags &= ~0xffUL;
@@ -3769,7 +3766,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
static const struct opcode group7_rm0[] = {
N,
- I(SrcNone | Priv | EmulateOnUD, em_vmcall),
+ I(SrcNone | Priv | EmulateOnUD, em_hypercall),
N, N, N, N, N, N,
};
@@ -3781,7 +3778,7 @@ static const struct opcode group7_rm1[] = {
static const struct opcode group7_rm3[] = {
DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
- II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall),
+ II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
DIP(SrcNone | Prot | Priv, stgi, check_svme),
@@ -4192,7 +4189,8 @@ static const struct opcode twobyte_table[256] = {
N, N,
G(BitOp, group8),
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
- F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
+ I(DstReg | SrcMem | ModRM, em_bsf_c),
+ I(DstReg | SrcMem | ModRM, em_bsr_c),
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
/* 0xC0 - 0xC7 */
F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
@@ -4759,9 +4757,9 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
(ctxt->b == 0xae) || (ctxt->b == 0xaf))
&& (((ctxt->rep_prefix == REPE_PREFIX) &&
- ((ctxt->eflags & EFLG_ZF) == 0))
+ ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
- ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
+ ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
return true;
return false;
@@ -4913,7 +4911,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
/* All REP prefixes have the same first termination condition */
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
ctxt->eip = ctxt->_eip;
- ctxt->eflags &= ~EFLG_RF;
+ ctxt->eflags &= ~X86_EFLAGS_RF;
goto done;
}
}
@@ -4963,9 +4961,9 @@ special_insn:
}
if (ctxt->rep_prefix && (ctxt->d & String))
- ctxt->eflags |= EFLG_RF;
+ ctxt->eflags |= X86_EFLAGS_RF;
else
- ctxt->eflags &= ~EFLG_RF;
+ ctxt->eflags &= ~X86_EFLAGS_RF;
if (ctxt->execute) {
if (ctxt->d & Fastop) {
@@ -5014,7 +5012,7 @@ special_insn:
rc = emulate_int(ctxt, ctxt->src.val);
break;
case 0xce: /* into */
- if (ctxt->eflags & EFLG_OF)
+ if (ctxt->eflags & X86_EFLAGS_OF)
rc = emulate_int(ctxt, 4);
break;
case 0xe9: /* jmp rel */
@@ -5027,19 +5025,19 @@ special_insn:
break;
case 0xf5: /* cmc */
/* complement carry flag from eflags reg */
- ctxt->eflags ^= EFLG_CF;
+ ctxt->eflags ^= X86_EFLAGS_CF;
break;
case 0xf8: /* clc */
- ctxt->eflags &= ~EFLG_CF;
+ ctxt->eflags &= ~X86_EFLAGS_CF;
break;
case 0xf9: /* stc */
- ctxt->eflags |= EFLG_CF;
+ ctxt->eflags |= X86_EFLAGS_CF;
break;
case 0xfc: /* cld */
- ctxt->eflags &= ~EFLG_DF;
+ ctxt->eflags &= ~X86_EFLAGS_DF;
break;
case 0xfd: /* std */
- ctxt->eflags |= EFLG_DF;
+ ctxt->eflags |= X86_EFLAGS_DF;
break;
default:
goto cannot_emulate;
@@ -5100,7 +5098,7 @@ writeback:
}
goto done; /* skip rip writeback */
}
- ctxt->eflags &= ~EFLG_RF;
+ ctxt->eflags &= ~X86_EFLAGS_RF;
}
ctxt->eip = ctxt->_eip;
@@ -5137,8 +5135,7 @@ twobyte_insn:
case 0x40 ... 0x4f: /* cmov */
if (test_cc(ctxt->b, ctxt->eflags))
ctxt->dst.val = ctxt->src.val;
- else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
- ctxt->op_bytes != 4)
+ else if (ctxt->op_bytes != 4)
ctxt->dst.type = OP_NONE; /* no writeback */
break;
case 0x80 ... 0x8f: /* jnz rel, etc*/
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 298781d4cfb4..4dce6f8b6129 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr)
(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
}
-static int pit_ioport_write(struct kvm_io_device *this,
+static int pit_ioport_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
{
struct kvm_pit *pit = dev_to_pit(this);
@@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this,
return 0;
}
-static int pit_ioport_read(struct kvm_io_device *this,
+static int pit_ioport_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
gpa_t addr, int len, void *data)
{
struct kvm_pit *pit = dev_to_pit(this);
@@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this,
return 0;
}
-static int speaker_ioport_write(struct kvm_io_device *this,
+static int speaker_ioport_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
{
struct kvm_pit *pit = speaker_to_pit(this);
@@ -606,8 +609,9 @@ static int speaker_ioport_write(struct kvm_io_device *this,
return 0;
}
-static int speaker_ioport_read(struct kvm_io_device *this,
- gpa_t addr, int len, void *data)
+static int speaker_ioport_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
{
struct kvm_pit *pit = speaker_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index dd1b16b611b0..c84990b42b5b 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -3,7 +3,7 @@
#include <linux/kthread.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
struct kvm_kpit_channel_state {
u32 count; /* can be 65536 */
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 9541ba34126b..fef922ff2635 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -529,42 +529,42 @@ static int picdev_read(struct kvm_pic *s,
return 0;
}
-static int picdev_master_write(struct kvm_io_device *dev,
+static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
return picdev_write(container_of(dev, struct kvm_pic, dev_master),
addr, len, val);
}
-static int picdev_master_read(struct kvm_io_device *dev,
+static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
return picdev_read(container_of(dev, struct kvm_pic, dev_master),
addr, len, val);
}
-static int picdev_slave_write(struct kvm_io_device *dev,
+static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
addr, len, val);
}
-static int picdev_slave_read(struct kvm_io_device *dev,
+static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
addr, len, val);
}
-static int picdev_eclr_write(struct kvm_io_device *dev,
+static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
addr, len, val);
}
-static int picdev_eclr_read(struct kvm_io_device *dev,
+static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 46d4449772bc..28146f03c514 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -206,6 +206,8 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
old_irr = ioapic->irr;
ioapic->irr |= mask;
+ if (edge)
+ ioapic->irr_delivered &= ~mask;
if ((edge && old_irr == ioapic->irr) ||
(!edge && entry.fields.remote_irr)) {
ret = 0;
@@ -349,7 +351,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
irqe.shorthand = 0;
if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
- ioapic->irr &= ~(1 << irq);
+ ioapic->irr_delivered |= 1 << irq;
if (irq == RTC_GSI && line_status) {
/*
@@ -473,13 +475,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
}
}
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
-{
- struct kvm_ioapic *ioapic = kvm->arch.vioapic;
- smp_rmb();
- return test_bit(vector, ioapic->handled_vectors);
-}
-
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
{
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
@@ -500,8 +495,8 @@ static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
(addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
}
-static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
- void *val)
+static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+ gpa_t addr, int len, void *val)
{
struct kvm_ioapic *ioapic = to_ioapic(this);
u32 result;
@@ -543,8 +538,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
return 0;
}
-static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
- const void *val)
+static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+ gpa_t addr, int len, const void *val)
{
struct kvm_ioapic *ioapic = to_ioapic(this);
u32 data;
@@ -599,6 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
ioapic->ioregsel = 0;
ioapic->irr = 0;
+ ioapic->irr_delivered = 0;
ioapic->id = 0;
memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
rtc_irq_eoi_tracking_reset(ioapic);
@@ -656,6 +652,7 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
spin_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+ state->irr &= ~ioapic->irr_delivered;
spin_unlock(&ioapic->lock);
return 0;
}
@@ -669,6 +666,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
ioapic->irr = 0;
+ ioapic->irr_delivered = 0;
update_handled_vectors(ioapic);
kvm_vcpu_request_scan_ioapic(kvm);
kvm_ioapic_inject_all(ioapic, state->irr);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index c2e36d934af4..ca0b0b4e6256 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -3,7 +3,7 @@
#include <linux/kvm_host.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
struct kvm;
struct kvm_vcpu;
@@ -77,6 +77,7 @@ struct kvm_ioapic {
struct rtc_status rtc_status;
struct delayed_work eoi_inject;
u32 irq_eoi[IOAPIC_NUM_PINS];
+ u32 irr_delivered;
};
#ifdef DEBUG
@@ -97,13 +98,19 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
return kvm->arch.vioapic;
}
+static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+ smp_rmb();
+ return test_bit(vector, ioapic->handled_vectors);
+}
+
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, unsigned int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
int trigger_mode);
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_destroy(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2d03568e9498..ad68c73008c5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -27,7 +27,7 @@
#include <linux/kvm_host.h>
#include <linux/spinlock.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
#include "ioapic.h"
#include "lapic.h"
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4ee827d7bf36..d67206a7b99a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -133,6 +133,28 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
+/* The logical map is definitely wrong if we have multiple
+ * modes at the same time. (Physical map is always right.)
+ */
+static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
+{
+ return !(map->mode & (map->mode - 1));
+}
+
+static inline void
+apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+{
+ unsigned lid_bits;
+
+ BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER != 4);
+ BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT != 8);
+ BUILD_BUG_ON(KVM_APIC_MODE_X2APIC != 16);
+ lid_bits = map->mode;
+
+ *cid = dest_id >> lid_bits;
+ *lid = dest_id & ((1 << lid_bits) - 1);
+}
+
static void recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
@@ -146,48 +168,6 @@ static void recalculate_apic_map(struct kvm *kvm)
if (!new)
goto out;
- new->ldr_bits = 8;
- /* flat mode is default */
- new->cid_shift = 8;
- new->cid_mask = 0;
- new->lid_mask = 0xff;
- new->broadcast = APIC_BROADCAST;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (!kvm_apic_present(vcpu))
- continue;
-
- if (apic_x2apic_mode(apic)) {
- new->ldr_bits = 32;
- new->cid_shift = 16;
- new->cid_mask = new->lid_mask = 0xffff;
- new->broadcast = X2APIC_BROADCAST;
- } else if (kvm_apic