From e76779339ba9c3d56f1a39ff0bfb1dfe7614b062 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 11 May 2011 08:30:51 -0400 Subject: KVM: Document KVM_GET_LAPIC, KVM_SET_LAPIC ioctl Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- Documentation/virtual/kvm/api.txt | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 42542eb802ca..67cc0f5b9974 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1291,6 +1291,38 @@ Returns the tsc frequency of the guest. The unit of the return value is KHz. If the host has unstable tsc this ioctl returns -EIO instead as an error. +4.56 KVM_GET_LAPIC + +Capability: KVM_CAP_IRQCHIP +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_lapic_state (out) +Returns: 0 on success, -1 on error + +#define KVM_APIC_REG_SIZE 0x400 +struct kvm_lapic_state { + char regs[KVM_APIC_REG_SIZE]; +}; + +Reads the Local APIC registers and copies them into the input argument. The +data format and layout are the same as documented in the architecture manual. + +4.57 KVM_SET_LAPIC + +Capability: KVM_CAP_IRQCHIP +Architectures: x86 +Type: vcpu ioctl +Parameters: struct kvm_lapic_state (in) +Returns: 0 on success, -1 on error + +#define KVM_APIC_REG_SIZE 0x400 +struct kvm_lapic_state { + char regs[KVM_APIC_REG_SIZE]; +}; + +Copies the input argument into the the Local APIC registers. The data format +and layout are the same as documented in the architecture manual. + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by -- cgit v1.2.3 From 67cbc90db5c0f03aa5e54204c388403ec8c25862 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sun, 15 May 2011 00:54:58 +0900 Subject: KVM: x86 emulator: Place insn_fetch helpers together The two macros need special care to use: Assume rc, ctxt, ops and done exist outside of them. Can goto outside. Considering the fact that these are used only in decode functions, moving these right after do_insn_fetch() seems to be a right thing to improve the readability. We also rename do_fetch_insn_byte() to do_insn_fetch_byte() to be consistent. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index adc98675cda0..6e4722c3dcdb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -407,23 +407,6 @@ struct gprefix { } \ } while (0) -/* Fetch next part of the instruction being emulated. */ -#define insn_fetch(_type, _size, _eip) \ -({ unsigned long _x; \ - rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ - if (rc != X86EMUL_CONTINUE) \ - goto done; \ - (_eip) += (_size); \ - (_type)_x; \ -}) - -#define insn_fetch_arr(_arr, _size, _eip) \ -({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ - if (rc != X86EMUL_CONTINUE) \ - goto done; \ - (_eip) += (_size); \ -}) - static int emulator_check_intercept(struct x86_emulate_ctxt *ctxt, enum x86_intercept intercept, enum x86_intercept_stage stage) @@ -671,7 +654,7 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, return ctxt->ops->read_std(ctxt, linear, data, size, &ctxt->exception); } -static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, +static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, u8 *dest) { @@ -707,13 +690,30 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, if (eip + size - ctxt->eip > 15) return X86EMUL_UNHANDLEABLE; while (size--) { - rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); + rc = do_insn_fetch_byte(ctxt, ops, eip++, dest++); if (rc != X86EMUL_CONTINUE) return rc; } return X86EMUL_CONTINUE; } +/* Fetch next part of the instruction being emulated. */ +#define insn_fetch(_type, _size, _eip) \ +({ unsigned long _x; \ + rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ + if (rc != X86EMUL_CONTINUE) \ + goto done; \ + (_eip) += (_size); \ + (_type)_x; \ +}) + +#define insn_fetch_arr(_arr, _size, _eip) \ +({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ + if (rc != X86EMUL_CONTINUE) \ + goto done; \ + (_eip) += (_size); \ +}) + /* * Given the 'reg' portion of a ModRM byte, and a register block, return a * pointer into the block that addresses the relevant register. -- cgit v1.2.3 From ef5d75cc9af2bca7c525158666b5f9696846ffb6 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sun, 15 May 2011 00:57:43 +0900 Subject: KVM: x86 emulator: Stop passing ctxt->ops as arg of decode helpers Dereference it in the actual users: only do_insn_fetch_byte(). This is consistent with the way __linearize() dereferences it. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 6e4722c3dcdb..df9082c811e0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -655,7 +655,6 @@ static int segmented_read_std(struct x86_emulate_ctxt *ctxt, } static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, unsigned long eip, u8 *dest) { struct fetch_cache *fc = &ctxt->decode.fetch; @@ -670,8 +669,8 @@ static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, rc = __linearize(ctxt, addr, size, false, true, &linear); if (rc != X86EMUL_CONTINUE) return rc; - rc = ops->fetch(ctxt, linear, fc->data + cur_size, - size, &ctxt->exception); + rc = ctxt->ops->fetch(ctxt, linear, fc->data + cur_size, + size, &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; fc->end += size; @@ -681,7 +680,6 @@ static int do_insn_fetch_byte(struct x86_emulate_ctxt *ctxt, } static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, unsigned long eip, void *dest, unsigned size) { int rc; @@ -690,7 +688,7 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, if (eip + size - ctxt->eip > 15) return X86EMUL_UNHANDLEABLE; while (size--) { - rc = do_insn_fetch_byte(ctxt, ops, eip++, dest++); + rc = do_insn_fetch_byte(ctxt, eip++, dest++); if (rc != X86EMUL_CONTINUE) return rc; } @@ -700,7 +698,7 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, /* Fetch next part of the instruction being emulated. */ #define insn_fetch(_type, _size, _eip) \ ({ unsigned long _x; \ - rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ + rc = do_insn_fetch(ctxt, (_eip), &_x, (_size)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ (_eip) += (_size); \ @@ -708,7 +706,7 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, }) #define insn_fetch_arr(_arr, _size, _eip) \ -({ rc = do_insn_fetch(ctxt, ops, (_eip), _arr, (_size)); \ +({ rc = do_insn_fetch(ctxt, (_eip), _arr, (_size)); \ if (rc != X86EMUL_CONTINUE) \ goto done; \ (_eip) += (_size); \ @@ -887,7 +885,6 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, } static int decode_modrm(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct operand *op) { struct decode_cache *c = &ctxt->decode; @@ -1014,7 +1011,6 @@ done: } static int decode_abs(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct operand *op) { struct decode_cache *c = &ctxt->decode; @@ -3327,7 +3323,6 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, unsigned size, bool sign_extension) { struct decode_cache *c = &ctxt->decode; - struct x86_emulate_ops *ops = ctxt->ops; int rc = X86EMUL_CONTINUE; op->type = OP_IMM; @@ -3362,10 +3357,8 @@ done: return rc; } -int -x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) +int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) { - struct x86_emulate_ops *ops = ctxt->ops; struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; @@ -3531,11 +3524,11 @@ done_prefixes: /* ModRM and SIB bytes. */ if (c->d & ModRM) { - rc = decode_modrm(ctxt, ops, &memop); + rc = decode_modrm(ctxt, &memop); if (!c->has_seg_override) set_seg_override(c, c->modrm_seg); } else if (c->d & MemAbs) - rc = decode_abs(ctxt, ops, &memop); + rc = decode_abs(ctxt, &memop); if (rc != X86EMUL_CONTINUE) goto done; -- cgit v1.2.3 From 7b105ca2903b84f023c49965d9a511c5e55256dc Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Sun, 15 May 2011 01:00:52 +0900 Subject: KVM: x86 emulator: Stop passing ctxt->ops as arg of emul functions Dereference it in the actual users. This not only cleans up the emulator but also makes it easy to convert the old emulation functions to the new em_xxx() form later. Note: Remove some inline keywords to let the compiler decide inlining. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_emulate.h | 3 +- arch/x86/kvm/emulate.c | 259 +++++++++++++++++-------------------- arch/x86/kvm/x86.c | 2 +- 3 files changed, 118 insertions(+), 146 deletions(-) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 0049211959c0..ab09ba290db3 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -373,6 +373,5 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt); int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int reason, bool has_error_code, u32 error_code); -int emulate_int_real(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int irq); +int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index df9082c811e0..d3f4466cd19c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -475,13 +475,12 @@ static void set_seg_override(struct decode_cache *c, int seg) c->seg_override = seg; } -static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int seg) +static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) { if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) return 0; - return ops->get_cached_segment_base(ctxt, seg); + return ctxt->ops->get_cached_segment_base(ctxt, seg); } static unsigned seg_override(struct x86_emulate_ctxt *ctxt, @@ -570,7 +569,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, u16 sel; unsigned cpl, rpl; - la = seg_base(ctxt, ctxt->ops, addr.seg) + addr.ea; + la = seg_base(ctxt, addr.seg) + addr.ea; switch (ctxt->mode) { case X86EMUL_MODE_REAL: break; @@ -1052,7 +1051,6 @@ static void fetch_bit_operand(struct decode_cache *c) } static int read_emulated(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, unsigned long addr, void *dest, unsigned size) { int rc; @@ -1064,8 +1062,8 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, if (mc->pos < mc->end) goto read_cached; - rc = ops->read_emulated(ctxt, addr, mc->data + mc->end, n, - &ctxt->exception); + rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n, + &ctxt->exception); if (rc != X86EMUL_CONTINUE) return rc; mc->end += n; @@ -1090,7 +1088,7 @@ static int segmented_read(struct x86_emulate_ctxt *ctxt, rc = linearize(ctxt, addr, size, false, &linear); if (rc != X86EMUL_CONTINUE) return rc; - return read_emulated(ctxt, ctxt->ops, linear, data, size); + return read_emulated(ctxt, linear, data, size); } static int segmented_write(struct x86_emulate_ctxt *ctxt, @@ -1124,7 +1122,6 @@ static int segmented_cmpxchg(struct x86_emulate_ctxt *ctxt, } static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, unsigned int size, unsigned short port, void *dest) { @@ -1143,7 +1140,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, if (n == 0) n = 1; rc->pos = rc->end = 0; - if (!ops->pio_in_emulated(ctxt, size, port, rc->data, n)) + if (!ctxt->ops->pio_in_emulated(ctxt, size, port, rc->data, n)) return 0; rc->end = n * size; } @@ -1154,9 +1151,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, } static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, u16 selector, struct desc_ptr *dt) { + struct x86_emulate_ops *ops = ctxt->ops; + if (selector & 1 << 2) { struct desc_struct desc; u16 sel; @@ -1173,48 +1171,42 @@ static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, /* allowed just for 8 bytes segments */ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, u16 selector, struct desc_struct *desc) { struct desc_ptr dt; u16 index = selector >> 3; - int ret; ulong addr; - get_descriptor_table_ptr(ctxt, ops, selector, &dt); + get_descriptor_table_ptr(ctxt, selector, &dt); if (dt.size < index * 8 + 7) return emulate_gp(ctxt, selector & 0xfffc); - addr = dt.address + index * 8; - ret = ops->read_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); - return ret; + addr = dt.address + index * 8; + return ctxt->ops->read_std(ctxt, addr, desc, sizeof *desc, + &ctxt->exception); } /* allowed just for 8 bytes segments */ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, u16 selector, struct desc_struct *desc) { struct desc_ptr dt; u16 index = selector >> 3; ulong addr; - int ret; - get_descriptor_table_ptr(ctxt, ops, selector, &dt); + get_descriptor_table_ptr(ctxt, selector, &dt); if (dt.size < index * 8 + 7) return emulate_gp(ctxt, selector & 0xfffc); addr = dt.address + index * 8; - ret = ops->write_std(ctxt, addr, desc, sizeof *desc, &ctxt->exception); - - return ret; + return ctxt->ops->write_std(ctxt, addr, desc, sizeof *desc, + &ctxt->exception); } /* Does not support long mode */ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, u16 selector, int seg) { struct desc_struct seg_desc; @@ -1249,7 +1241,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (null_selector) /* for NULL selector skip all following checks */ goto load; - ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); + ret = read_segment_descriptor(ctxt, selector, &seg_desc); if (ret != X86EMUL_CONTINUE) return ret; @@ -1267,7 +1259,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, rpl = selector & 3; dpl = seg_desc.dpl; - cpl = ops->cpl(ctxt); + cpl = ctxt->ops->cpl(ctxt); switch (seg) { case VCPU_SREG_SS: @@ -1318,12 +1310,12 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (seg_desc.s) { /* mark segment as accessed */ seg_desc.type |= 1; - ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); + ret = write_segment_descriptor(ctxt, selector, &seg_desc); if (ret != X86EMUL_CONTINUE) return ret; } load: - ops->set_segment(ctxt, selector, &seg_desc, 0, seg); + ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg); return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1424,13 +1416,12 @@ static int em_pop(struct x86_emulate_ctxt *ctxt) } static int emulate_popf(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - void *dest, int len) + void *dest, int len) { int rc; unsigned long val, change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - int cpl = ops->cpl(ctxt); + int cpl = ctxt->ops->cpl(ctxt); rc = emulate_pop(ctxt, &val, len); if (rc != X86EMUL_CONTINUE) @@ -1471,11 +1462,10 @@ static int em_popf(struct x86_emulate_ctxt *ctxt) c->dst.type = OP_REG; c->dst.addr.reg = &ctxt->eflags; c->dst.bytes = c->op_bytes; - return emulate_popf(ctxt, ctxt->ops, &c->dst.val, c->op_bytes); + return emulate_popf(ctxt, &c->dst.val, c->op_bytes); } -static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int seg) +static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg) { struct decode_cache *c = &ctxt->decode; @@ -1484,8 +1474,7 @@ static int emulate_push_sreg(struct x86_emulate_ctxt *ctxt, return em_push(ctxt); } -static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int seg) +static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, int seg) { struct decode_cache *c = &ctxt->decode; unsigned long selector; @@ -1495,7 +1484,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; - rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); + rc = load_segment_descriptor(ctxt, (u16)selector, seg); return rc; } @@ -1549,10 +1538,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) return rc; } -int emulate_int_real(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int irq) +int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) { struct decode_cache *c = &ctxt->decode; + struct x86_emulate_ops *ops = ctxt->ops; int rc; struct desc_ptr dt; gva_t cs_addr; @@ -1590,7 +1579,7 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; - rc = load_segment_descriptor(ctxt, ops, cs, VCPU_SREG_CS); + rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS); if (rc != X86EMUL_CONTINUE) return rc; @@ -1599,12 +1588,11 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, return rc; } -static int emulate_int(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int irq) +static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq) { switch(ctxt->mode) { case X86EMUL_MODE_REAL: - return emulate_int_real(ctxt, ops, irq); + return emulate_int_real(ctxt, irq); case X86EMUL_MODE_VM86: case X86EMUL_MODE_PROT16: case X86EMUL_MODE_PROT32: @@ -1615,8 +1603,7 @@ static int emulate_int(struct x86_emulate_ctxt *ctxt, } } -static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc = X86EMUL_CONTINUE; @@ -1648,7 +1635,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, if (rc != X86EMUL_CONTINUE) return rc; - rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); + rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); if (rc != X86EMUL_CONTINUE) return rc; @@ -1669,12 +1656,11 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt, return rc; } -static inline int emulate_iret(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops* ops) +static int emulate_iret(struct x86_emulate_ctxt *ctxt) { switch(ctxt->mode) { case X86EMUL_MODE_REAL: - return emulate_iret_real(ctxt, ops); + return emulate_iret_real(ctxt); case X86EMUL_MODE_VM86: case X86EMUL_MODE_PROT16: case X86EMUL_MODE_PROT32: @@ -1693,7 +1679,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) memcpy(&sel, c->src.valptr + c->op_bytes, 2); - rc = load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS); + rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); if (rc != X86EMUL_CONTINUE) return rc; @@ -1830,8 +1816,7 @@ static int em_grp9(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static int emulate_ret_far(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; int rc; @@ -1845,12 +1830,11 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, rc = emulate_pop(ctxt, &cs, c->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); + rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); return rc; } -static int emulate_load_segment(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, int seg) +static int emulate_load_segment(struct x86_emulate_ctxt *ctxt, int seg) { struct decode_cache *c = &ctxt->decode; unsigned short sel; @@ -1858,7 +1842,7 @@ static int emulate_load_segment(struct x86_emulate_ctxt *ctxt, memcpy(&sel, c->src.valptr + c->op_bytes, 2); - rc = load_segment_descriptor(ctxt, ops, sel, seg); + rc = load_segment_descriptor(ctxt, sel, seg); if (rc != X86EMUL_CONTINUE) return rc; @@ -1866,15 +1850,14 @@ static int emulate_load_segment(struct x86_emulate_ctxt *ctxt, return rc; } -static inline void +static void setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct desc_struct *cs, - struct desc_struct *ss) + struct desc_struct *cs, struct desc_struct *ss) { u16 selector; memset(cs, 0, sizeof(struct desc_struct)); - ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); + ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS); memset(ss, 0, sizeof(struct desc_struct)); cs->l = 0; /* will be adjusted later */ @@ -1897,10 +1880,10 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt, ss->p = 1; } -static int -emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) +static int emulate_syscall(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; + struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; @@ -1912,7 +1895,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) return emulate_ud(ctxt); ops->get_msr(ctxt, MSR_EFER, &efer); - setup_syscalls_segments(ctxt, ops, &cs, &ss); + setup_syscalls_segments(ctxt, &cs, &ss); ops->get_msr(ctxt, MSR_STAR, &msr_data); msr_data >>= 32; @@ -1950,16 +1933,16 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) return X86EMUL_CONTINUE; } -static int -emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) +static int emulate_sysenter(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; + struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; u64 msr_data; u16 cs_sel, ss_sel; u64 efer = 0; - ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); + ops->get_msr(ctxt, MSR_EFER, &efer); /* inject #GP if in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL) return emulate_gp(ctxt, 0); @@ -1970,7 +1953,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) if (ctxt->mode == X86EMUL_MODE_PROT64) return emulate_ud(ctxt); - setup_syscalls_segments(ctxt, ops, &cs, &ss); + setup_syscalls_segments(ctxt, &cs, &ss); ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); switch (ctxt->mode) { @@ -2006,10 +1989,10 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) return X86EMUL_CONTINUE; } -static int -emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) +static int emulate_sysexit(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; + struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct cs, ss; u64 msr_data; int usermode; @@ -2020,7 +2003,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ctxt->mode == X86EMUL_MODE_VM86) return emulate_gp(ctxt, 0); - setup_syscalls_segments(ctxt, ops, &cs, &ss); + setup_syscalls_segments(ctxt, &cs, &ss); if ((c->rex_prefix & 0x8) != 0x0) usermode = X86EMUL_MODE_PROT64; @@ -2058,8 +2041,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) return X86EMUL_CONTINUE; } -static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops) +static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) { int iopl; if (ctxt->mode == X86EMUL_MODE_REAL) @@ -2067,13 +2049,13 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, if (ctxt->mode == X86EMUL_MODE_VM86) return true; iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - return ops->cpl(ctxt) > iopl; + return ctxt->ops->cpl(ctxt) > iopl; } static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, u16 port, u16 len) { + struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct tr_seg; u32 base3; int r; @@ -2104,14 +2086,13 @@ static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, } static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, u16 port, u16 len) { if (ctxt->perm_ok) return true; - if (emulator_bad_iopl(ctxt, ops)) - if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) + if (emulator_bad_iopl(ctxt)) + if (!emulator_io_port_access_allowed(ctxt, port, len)) return false; ctxt->perm_ok = true; @@ -2120,7 +2101,6 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, } static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct tss_segment_16 *tss) { struct decode_cache *c = &ctxt->decode; @@ -2144,7 +2124,6 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, } static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct tss_segment_16 *tss) { struct decode_cache *c = &ctxt->decode; @@ -2175,19 +2154,19 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * Now load segment descriptors. If fault happenes at this stage * it is handled in a context of new task */ - ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); + ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); + ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); + ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); + ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); + ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS); if (ret != X86EMUL_CONTINUE) return ret; @@ -2195,10 +2174,10 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, } static int task_switch_16(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, u16 tss_selector, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { + struct x86_emulate_ops *ops = ctxt->ops; struct tss_segment_16 tss_seg; int ret; u32 new_tss_base = get_desc_base(new_desc); @@ -2209,7 +2188,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, /* FIXME: need to provide precise fault address */ return ret; - save_state_to_tss16(ctxt, ops, &tss_seg); + save_state_to_tss16(ctxt, &tss_seg); ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); @@ -2235,16 +2214,15 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt, return ret; } - return load_state_from_tss16(ctxt, ops, &tss_seg); + return load_state_from_tss16(ctxt, &tss_seg); } static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct tss_segment_32 *tss) { struct decode_cache *c = &ctxt->decode; - tss->cr3 = ops->get_cr(ctxt, 3); + tss->cr3 = ctxt->ops->get_cr(ctxt, 3); tss->eip = c->eip; tss->eflags = ctxt->eflags; tss->eax = c->regs[VCPU_REGS_RAX]; @@ -2266,13 +2244,12 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, } static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, struct tss_segment_32 *tss) { struct decode_cache *c = &ctxt->decode; int ret; - if (ops->set_cr(ctxt, 3, tss->cr3)) + if (ctxt->ops->set_cr(ctxt, 3, tss->cr3)) return emulate_gp(ctxt, 0); c->eip = tss->eip; ctxt->eflags = tss->eflags | 2; @@ -2301,25 +2278,25 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * Now load segment descriptors. If fault happenes at this stage * it is handled in a context of new task */ - ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); + ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); + ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); + ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); + ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); + ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); + ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); + ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS); if (ret != X86EMUL_CONTINUE) return ret; @@ -2327,10 +2304,10 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, } static int task_switch_32(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, u16 tss_selector, u16 old_tss_sel, ulong old_tss_base, struct desc_struct *new_desc) { + struct x86_emulate_ops *ops = ctxt->ops; struct tss_segment_32 tss_seg; int ret; u32 new_tss_base = get_desc_base(new_desc); @@ -2341,7 +2318,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, /* FIXME: need to provide precise fault address */ return ret; - save_state_to_tss32(ctxt, ops, &tss_seg); + save_state_to_tss32(ctxt, &tss_seg); ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg, &ctxt->exception); @@ -2367,14 +2344,14 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt, return ret; } - return load_state_from_tss32(ctxt, ops, &tss_seg); + return load_state_from_tss32(ctxt, &tss_seg); } static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, u16 tss_selector, int reason, bool has_error_code, u32 error_code) { + struct x86_emulate_ops *ops = ctxt->ops; struct desc_struct curr_tss_desc, next_tss_desc; int ret; u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR); @@ -2384,10 +2361,10 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, /* FIXME: old_tss_base == ~0 ? */ - ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); + ret = read_segment_descriptor(ctxt, tss_selector, &next_tss_desc); if (ret != X86EMUL_CONTINUE) return ret; - ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); + ret = read_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc); if (ret != X86EMUL_CONTINUE) return ret; @@ -2409,8 +2386,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ - write_segment_descriptor(ctxt, ops, old_tss_sel, - &curr_tss_desc); + write_segment_descriptor(ctxt, old_tss_sel, &curr_tss_desc); } if (reason == TASK_SWITCH_IRET) @@ -2422,10 +2398,10 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, old_tss_sel = 0xffff; if (next_tss_desc.type & 8) - ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, + ret = task_switch_32(ctxt, tss_selector, old_tss_sel, old_tss_base, &next_tss_desc); else - ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, + ret = task_switch_16(ctxt, tss_selector, old_tss_sel, old_tss_base, &next_tss_desc); if (ret != X86EMUL_CONTINUE) return ret; @@ -2435,8 +2411,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, if (reason != TASK_SWITCH_IRET) { next_tss_desc.type |= (1 << 1); /* set busy flag */ - write_segment_descriptor(ctxt, ops, tss_selector, - &next_tss_desc); + write_segment_descriptor(ctxt, tss_selector, &next_tss_desc); } ops->set_cr(ctxt, 0, ops->get_cr(ctxt, 0) | X86_CR0_TS); @@ -2458,14 +2433,13 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, u16 tss_selector, int reason, bool has_error_code, u32 error_code) { - struct x86_emulate_ops *ops = ctxt->ops; struct decode_cache *c = &ctxt->decode; int rc; c->eip = ctxt->eip; c->dst.type = OP_NONE; - rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, + rc = emulator_do_task_switch(ctxt, tss_selector, reason, has_error_code, error_code); if (rc == X86EMUL_CONTINUE) @@ -2535,7 +2509,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) old_eip = c->eip; memcpy(&sel, c->src.valptr + c->op_bytes, 2); - if (load_segment_descriptor(ctxt, ctxt->ops, sel, VCPU_SREG_CS)) + if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) return X86EMUL_CONTINUE; c->eip = 0; @@ -2973,7 +2947,7 @@ static int check_perm_in(struct x86_emulate_ctxt *ctxt) struct decode_cache *c = &ctxt->decode; c->dst.bytes = min(c->dst.bytes, 4u); - if (!emulator_io_permited(ctxt, ctxt->ops, c->src.val, c->dst.bytes)) + if (!emulator_io_permited(ctxt, c->src.val, c->dst.bytes)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -2984,7 +2958,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) struct decode_cache *c = &ctxt->decode; c->src.bytes = min(c->src.bytes, 4u); - if (!emulator_io_permited(ctxt, ctxt->ops, c->dst.val, c->src.bytes)) + if (!emulator_io_permited(ctxt, c->dst.val, c->src.bytes)) return emulate_gp(ctxt, 0); return X86EMUL_CONTINUE; @@ -3724,8 +3698,7 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) return false; } -int -x86_emulate_insn(struct x86_emulate_ctxt *ctxt) +int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) { struct x86_emulate_ops *ops = ctxt->ops; u64 msr_data; @@ -3854,25 +3827,25 @@ special_insn: switch (c->b) { case 0x06: /* push es */ - rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_ES); + rc = emulate_push_sreg(ctxt, VCPU_SREG_ES); break; case 0x07: /* pop es */ - rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); + rc = emulate_pop_sreg(ctxt, VCPU_SREG_ES); break; case 0x0e: /* push cs */ - rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_CS); + rc = emulate_push_sreg(ctxt, VCPU_SREG_CS); break; case 0x16: /* push ss */ - rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_SS); + rc = emulate_push_sreg(ctxt, VCPU_SREG_SS); break; case 0x17: /* pop ss */ - rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); + rc = emulate_pop_sreg(ctxt, VCPU_SREG_SS); break; case 0x1e: /* push ds */ - rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_DS); + rc = emulate_push_sreg(ctxt, VCPU_SREG_DS); break; case 0x1f: /* pop ds */ - rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); + rc = emulate_pop_sreg(ctxt, VCPU_SREG_DS); break; case 0x40 ... 0x47: /* inc r16/r32 */ emulate_1op("inc", c->dst, ctxt->eflags); @@ -3938,7 +3911,7 @@ special_insn: if (c->modrm_reg == VCPU_SREG_SS) ctxt->interruptibility = KVM_X86_SHADOW_INT_MOV_SS; - rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); + rc = load_segment_descriptor(ctxt, sel, c->modrm_reg); c->dst.type = OP_NONE; /* Disable writeback. */ break; @@ -3969,13 +3942,13 @@ special_insn: rc = em_pop(ctxt); break; case 0xc4: /* les */ - rc = emulate_load_segment(ctxt, ops, VCPU_SREG_ES); + rc = emulate_load_segment(ctxt, VCPU_SREG_ES); break; case 0xc5: /* lds */ - rc = emulate_load_segment(ctxt, ops, VCPU_SREG_DS); + rc = emulate_load_segment(ctxt, VCPU_SREG_DS); break; case 0xcb: /* ret far */ - rc = emulate_ret_far(ctxt, ops); + rc = emulate_ret_far(ctxt); break; case 0xcc: /* int3 */ irq = 3; @@ -3983,7 +3956,7 @@ special_insn: case 0xcd: /* int n */ irq = c->src.val; do_interrupt: - rc = emulate_int(ctxt, ops, irq); + rc = emulate_int(ctxt, irq); break; case 0xce: /* into */ if (ctxt->eflags & EFLG_OF) { @@ -3992,7 +3965,7 @@ special_insn: } break; case 0xcf: /* iret */ - rc = emulate_iret(ctxt, ops); + rc = emulate_iret(ctxt); break; case 0xd0 ... 0xd1: /* Grp2 */ rc = em_grp2(ctxt); @@ -4037,7 +4010,7 @@ special_insn: case 0xec: /* in al,dx */ case 0xed: /* in (e/r)ax,dx */ do_io_in: - if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, + if (!pio_in_emulated(ctxt, c->dst.bytes, c->src.val, &c->dst.val)) goto done; /* IO is needed */ break; @@ -4065,14 +4038,14 @@ special_insn: ctxt->eflags |= EFLG_CF; break; case 0xfa: /* cli */ - if (emulator_bad_iopl(ctxt, ops)) { + if (emulator_bad_iopl(ctxt)) { rc = emulate_gp(ctxt, 0); goto done; } else ctxt->eflags &= ~X86_EFLAGS_IF; break; case 0xfb: /* sti */ - if (emulator_bad_iopl(ctxt, ops)) { + if (emulator_bad_iopl(ctxt)) { rc = emulate_gp(ctxt, 0); goto done; } else { @@ -4154,7 +4127,7 @@ done: twobyte_insn: switch (c->b) { case 0x05: /* syscall */ - rc = emulate_syscall(ctxt, ops); + rc = emulate_syscall(ctxt); break; case 0x06: rc = em_clts(ctxt); @@ -4216,10 +4189,10 @@ twobyte_insn: rc = X86EMUL_CONTINUE; break; case 0x34: /* sysenter */ - rc = emulate_sysenter(ctxt, ops); + rc = emulate_sysenter(ctxt); break; case 0x35: /* sysexit */ - rc = emulate_sysexit(ctxt, ops); + rc = emulate_sysexit(ctxt); break; case 0x40 ... 0x4f: /* cmov */ c->dst.val = c->dst.orig_val = c->src.val; @@ -4234,10 +4207,10 @@ twobyte_insn: c->dst.val = test_cc(c->b, ctxt->eflags); break; case 0xa0: /* push fs */ - rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_FS); + rc = emulate_push_sreg(ctxt, VCPU_SREG_FS); break; case 0xa1: /* pop fs */ - rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); + rc = emulate_pop_sreg(ctxt, VCPU_SREG_FS); break; case 0xa3: bt: /* bt */ @@ -4251,10 +4224,10 @@ twobyte_insn: emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); break; case 0xa8: /* push gs */ - rc = emulate_push_sreg(ctxt, ops, VCPU_SREG_GS); + rc = emulate_push_sreg(ctxt, VCPU_SREG_GS); break; case 0xa9: /* pop gs */ - rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); + rc = emulate_pop_sreg(ctxt, VCPU_SREG_GS); break; case 0xab: bts: /* bts */ @@ -4284,17 +4257,17 @@ twobyte_insn: } break; case 0xb2: /* lss */ - rc = emulate_load_segment(ctxt, ops, VCPU_SREG_SS); + rc = emulate_load_segment(ctxt, VCPU_SREG_SS); break; case 0xb3: btr: /* btr */ emulate_2op_SrcV_nobyte("btr", c->src, c->dst, ctxt->eflags); break; case 0xb4: /* lfs */ - rc = emulate_load_segment(ctxt, ops, VCPU_SREG_FS); + rc = emulate_load_segment(ctxt, VCPU_SREG_FS); break; case 0xb5: /* lgs */ - rc = emulate_load_segment(ctxt, ops, VCPU_SREG_GS); + rc = emulate_load_segment(ctxt, VCPU_SREG_GS); break; case 0xb6 ... 0xb7: /* movzx */ c->dst.bytes = c->op_bytes; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 77c9d8673dc4..51df0b6c891a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4513,7 +4513,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) vcpu->arch.emulate_ctxt.decode.ad_bytes = 2; vcpu->arch.emulate_ctxt.decode.eip = vcpu->arch.emulate_ctxt.eip + inc_eip; - ret = emulate_int_real(&vcpu->arch.emulate_ctxt, &emulate_ops, irq); + ret = emulate_int_real(&vcpu->arch.emulate_ctxt, irq); if (ret != X86EMUL_CONTINUE) return EMULATE_FAIL; -- cgit v1.2.3 From 5e520e62787afd8fc28626fd8d4f77491135119d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 May 2011 10:13:12 -0400 Subject: KVM: VMX: Move VMREAD cleanup to exception handler We clean up a failed VMREAD by clearing the output register. Do it in the exception handler instead of unconditionally. This is worthwhile since there are more than a hundred call sites. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 6 +++++- arch/x86/kvm/vmx.c | 8 +++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d2ac8e2ee897..db4b6543b830 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -830,11 +830,12 @@ enum { asmlinkage void kvm_spurious_fault(void); extern bool kvm_rebooting; -#define __kvm_handle_fault_on_reboot(insn) \ +#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \ "666: " insn "\n\t" \ "668: \n\t" \ ".pushsection .fixup, \"ax\" \n" \ "667: \n\t" \ + cleanup_insn "\n\t" \ "cmpb $0, kvm_rebooting \n\t" \ "jne 668b \n\t" \ __ASM_SIZE(push) " $666b \n\t" \ @@ -844,6 +845,9 @@ extern bool kvm_rebooting; _ASM_PTR " 666b, 667b \n\t" \ ".popsection" +#define __kvm_handle_fault_on_reboot(insn) \ + ____kvm_handle_fault_on_reboot(insn, "") + #define KVM_ARCH_WANT_MMU_NOTIFIER int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d48ec60ea421..7d22d6c6734d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -43,6 +43,8 @@ #include "trace.h" #define __ex(x) __kvm_handle_fault_on_reboot(x) +#define __ex_clear(x, reg) \ + ____kvm_handle_fault_on_reboot(x, "xor " reg " , " reg) MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -587,10 +589,10 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) static unsigned long vmcs_readl(unsigned long field) { - unsigned long value = 0; + unsigned long value; - asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) - : "+a"(value) : "d"(field) : "cc"); + asm volatile (__ex_clear(ASM_VMX_VMREAD_RDX_RAX, "%0") + : "=a"(value) : "d"(field) : "cc"); return value; } -- cgit v1.2.3 From 96304217a783a65c0923a26f54141cfe7a2a71b5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 15 May 2011 10:13:13 -0400 Subject: KVM: VMX: always_inline VMREADs vmcs_readl() and friends are really short, but gcc thinks they are long because of the out-of-line exception handlers. Mark them always_inline to clear the misunderstanding. Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7d22d6c6734d..3365e5dd3360 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -587,7 +587,7 @@ static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) } } -static unsigned long vmcs_readl(unsigned long field) +static __always_inline unsigned long vmcs_readl(unsigned long field) { unsigned long value; @@ -596,17 +596,17 @@ static unsigned long vmcs_readl(unsigned long field) return value; } -static u16 vmcs_read16(unsigned long field) +static __always_inline u16 vmcs_read16(unsigned long field) { return vmcs_readl(field); } -static u32 vmcs_read32(unsigned long field) +static __always_inline u32 vmcs_read32(unsigned long field) { return vmcs_readl(field); } -static u64 vmcs_read64(unsigned long field) +static __always_inline u64 vmcs_read64(unsigned long field) { #ifdef CONFIG_X86_64 return vmcs_readl(field); -- cgit v1.2.3 From 332b207d65c1d7982489dbb83e5071c95e19eb75 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 15 May 2011 23:20:27 +0800 Subject: KVM: MMU: optimize pte write path if don't have protected sp Simply return from kvm_mmu_pte_write path if no shadow page is write-protected, then we can avoid to walk all shadow pages and hold mmu-lock Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index db4b6543b830..387780eb97bb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -441,6 +441,7 @@ struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; unsigned int n_max_mmu_pages; + unsigned int indirect_shadow_pages; atomic_t invlpg_counter; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index aee38623b768..b4ae7afa6b3b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -498,6 +498,7 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) linfo = lpage_info_slot(gfn, slot, i); linfo->write_count += 1; } + kvm->arch.indirect_shadow_pages++; } static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) @@ -513,6 +514,7 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) linfo->write_count -= 1; WARN_ON(linfo->write_count < 0); } + kvm->arch.indirect_shadow_pages--; } static int has_wrprotected_page(struct kvm *kvm, @@ -3233,6 +3235,13 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int level, npte, invlpg_counter, r, flooded = 0; bool remote_flush, local_flush, zap_page; + /* + * If we don't have indirect shadow pages, it means no page is + * write-protected, so we can exit simply. + */ + if (!ACCESS_ONCE(vcpu->kvm->arch.indirect_shadow_pages)) + return; + zap_page = remote_flush = local_flush = false; offset = offset_in_page(gpa); -- cgit v1.2.3 From 8b0cedff040b652f3d36b1368778667581b0c140 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 15 May 2011 23:22:04 +0800 Subject: KVM: use __copy_to_user/__clear_user to write guest page Simply use __copy_to_user/__clear_user to write guest page since we have already verified the user address when the memslot is set Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 4 ++-- virt/kvm/kvm_main.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 51df0b6c891a..0b089860e950 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1388,7 +1388,7 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) return 1; kvm_x86_ops->patch_hypercall(vcpu, instructions); ((unsigned char *)instructions)[3] = 0xc3; /* ret */ - if (copy_to_user((void __user *)addr, instructions, 4)) + if (__copy_to_user((void __user *)addr, instructions, 4)) return 1; kvm->arch.hv_hypercall = data; break; @@ -1415,7 +1415,7 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); if (kvm_is_error_hva(addr)) return 1; - if (clear_user((void __user *)addr, PAGE_SIZE)) + if (__clear_user((void __user *)addr, PAGE_SIZE)) return 1; vcpu->arch.hv_vapic = data; break; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 96ebc0679415..087b3f8ca46e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1345,7 +1345,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return -EFAULT; - r = copy_to_user((void __user *)addr + offset, data, len); + r = __copy_to_user((void __user *)addr + offset, data, len); if (r) return -EFAULT; mark_page_dirty(kvm, gfn); @@ -1405,7 +1405,7 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, if (kvm_is_error_hva(ghc->hva)) return -EFAULT; - r = copy_to_user((void __user *)ghc->hva, data, len); + r = __copy_to_user((void __user *)ghc->hva, data, len); if (r) return -EFAULT; mark_page_dirty_in_slot(kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT); -- cgit v1.2.3 From 1249b96e72533ffdb2fa25b5d7471918b065ccc8 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 15 May 2011 23:25:10 +0800 Subject: KVM: fix uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix: warning: ‘cs_sel’ may be used uninitialized in this function warning: ‘ss_sel’ may be used uninitialized in this function Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d3f4466cd19c..bc916bd22e86 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1996,7 +1996,7 @@ static int emulate_sysexit(struct x86_emulate_ctxt *ctxt) struct desc_struct cs, ss; u64 msr_data; int usermode; - u16 cs_sel, ss_sel; + u16 cs_sel = 0, ss_sel = 0; /* inject #GP if in real mode or Virtual 8086 mode */ if (ctxt->mode == X86EMUL_MODE_REAL || -- cgit v1.2.3 From 53c07b18787d564a105e1aa678795d67eeb27447 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 15 May 2011 23:26:20 +0800 Subject: KVM: MMU: abstract the operation of rmap Abstract the operation of rmap to spte_list, then we can use it for the reverse mapping of parent pte in the later patch Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 260 +++++++++++++++++++++------------------- 2 files changed, 140 insertions(+), 122 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 387780eb97bb..e6a4a57e142b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -347,7 +347,7 @@ struct kvm_vcpu_arch { struct kvm_pv_mmu_op_buffer mmu_op_buffer; struct kvm_mmu_memory_cache mmu_pte_chain_cache; - struct kvm_mmu_memory_cache mmu_rmap_desc_cache; + struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; struct kvm_mmu_memory_cache mmu_page_cache; struct kvm_mmu_memory_cache mmu_page_header_cache; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b4ae7afa6b3b..a6811cbdbf0d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -148,7 +148,7 @@ module_param(oos_shadow, bool, 0644); #define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \ | PT64_NX_MASK) -#define RMAP_EXT 4 +#define PTE_LIST_EXT 4 #define ACC_EXEC_MASK 1 #define ACC_WRITE_MASK PT_WRITABLE_MASK @@ -164,9 +164,9 @@ module_param(oos_shadow, bool, 0644); #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) -struct kvm_rmap_desc { - u64 *sptes[RMAP_EXT]; - struct kvm_rmap_desc *more; +struct pte_list_desc { + u64 *sptes[PTE_LIST_EXT]; + struct pte_list_desc *more; }; struct kvm_shadow_walk_iterator { @@ -185,7 +185,7 @@ struct kvm_shadow_walk_iterator { typedef void (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp, u64 *spte); static struct kmem_cache *pte_chain_cache; -static struct kmem_cache *rmap_desc_cache; +static struct kmem_cache *pte_list_desc_cache; static struct kmem_cache *mmu_page_header_cache; static struct percpu_counter kvm_total_used_mmu_pages; @@ -401,8 +401,8 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) pte_chain_cache, 4); if (r) goto out; - r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, - rmap_desc_cache, 4 + PTE_PREFETCH_NUM); + r = mmu_topup_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache, + pte_list_desc_cache, 4 + PTE_PREFETCH_NUM); if (r) goto out; r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); @@ -416,8 +416,10 @@ out: static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { - mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, pte_chain_cache); - mmu_free_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, rmap_desc_cache); + mmu_free_memory_cache(&vcpu->arch.mmu_pte_chain_cache, + pte_chain_cache); + mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache, + pte_list_desc_cache); mmu_free_memory_cache_page(&vcpu->arch.mmu_page_cache); mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache, mmu_page_header_cache); @@ -444,15 +446,15 @@ static void mmu_free_pte_chain(struct kvm_pte_chain *pc) kmem_cache_free(pte_chain_cache, pc); } -static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) +static struct pte_list_desc *mmu_alloc_pte_list_desc(struct kvm_vcpu *vcpu) { - return mmu_memory_cache_alloc(&vcpu->arch.mmu_rmap_desc_cache, - sizeof(struct kvm_rmap_desc)); + return mmu_memory_cache_alloc(&vcpu->arch.mmu_pte_list_desc_cache, + sizeof(struct pte_list_desc)); } -static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd) +static void mmu_free_pte_list_desc(struct pte_list_desc *pte_list_desc) { - kmem_cache_free(rmap_desc_cache, rd); + kmem_cache_free(pte_list_desc_cache, pte_list_desc); } static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) @@ -590,67 +592,42 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) } /* - * Take gfn and return the reverse mapping to it. - */ - -static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) -{ - struct kvm_memory_slot *slot; - struct kvm_lpage_info *linfo; - - slot = gfn_to_memslot(kvm, gfn); - if (likely(level == PT_PAGE_TABLE_LEVEL)) - return &slot->rmap[gfn - slot->base_gfn]; - - linfo = lpage_info_slot(gfn, slot, level); - - return &linfo->rmap_pde; -} - -/* - * Reverse mapping data structures: + * Pte mapping structures: * - * If rmapp bit zero is zero, then rmapp point to the shadw page table entry - * that points to page_address(page). + * If pte_list bit zero is zero, then pte_list point to the spte. * - * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc - * containing more mappings. + * If pte_list bit zero is one, (then pte_list & ~1) points to a struct + * pte_list_desc containing more mappings. * - * Returns the number of rmap entries before the spte was added or zero if + * Returns the number of pte entries before the spte was added or zero if * the spte was not added. * */ -static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) +static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, + unsigned long *pte_list) { - struct kvm_mmu_page *sp; - struct kvm_rmap_desc *desc; - unsigned long *rmapp; + struct pte_list_desc *desc; int i, count = 0; - if (!is_rmap_spte(*spte)) - return count; - sp = page_header(__pa(spte)); - kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); - rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); - if (!*rmapp) { - rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte); - *rmapp = (unsigned long)spte; - } else if (!(*rmapp & 1)) { - rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte); - desc = mmu_alloc_rmap_desc(vcpu); - desc->sptes[0] = (u64 *)*rmapp; + if (!*pte_list) { + rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte); + *pte_list = (unsigned long)spte; + } else if (!(*pte_list & 1)) { + rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte); + desc = mmu_alloc_pte_list_desc(vcpu); + desc->sptes[0] = (u64 *)*pte_list; desc->sptes[1] = spte; - *rmapp = (unsigned long)desc | 1; + *pte_list = (unsigned long)desc | 1; ++count; } else { - rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); - desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); - while (desc->sptes[RMAP_EXT-1] && desc->more) { + rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); + desc = (struct pte_list_desc *)(*pte_list & ~1ul); + while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { desc = desc->more; - count += RMAP_EXT; + count += PTE_LIST_EXT; } - if (desc->sptes[RMAP_EXT-1]) { - desc->more = mmu_alloc_rmap_desc(vcpu); + if (desc->sptes[PTE_LIST_EXT-1]) { + desc->more = mmu_alloc_pte_list_desc(vcpu); desc = desc->more; } for (i = 0; desc->sptes[i]; ++i) @@ -660,59 +637,78 @@ static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) return count; } -static void rmap_desc_remove_entry(unsigned long *rmapp, - struct kvm_rmap_desc *desc, - int i, - struct kvm_rmap_desc *prev_desc) +static u64 *pte_list_next(unsigned long *pte_list, u64 *spte) +{ + struct pte_list_desc *desc; + u64 *prev_spte; + int i; + + if (!*pte_list) + return NULL; + else if (!(*pte_list & 1)) { + if (!spte) + return (u64 *)*pte_list; + return NULL; + } + desc = (struct pte_list_desc *)(*pte_list & ~1ul); + prev_spte = NULL; + while (desc) { + for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { + if (prev_spte == spte) + return desc->sptes[i]; + prev_spte = desc->sptes[i]; + } + desc = desc->more; + } + return NULL; +} + +static void +pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, + int i, struct pte_list_desc *prev_desc) { int j; - for (j = RMAP_EXT - 1; !desc->sptes[j] && j > i; --j) + for (j = PTE_LIST_EXT - 1; !desc->sptes[j] && j > i; --j) ; desc->sptes[i] = desc->sptes[j]; desc->sptes[j] = NULL; if (j != 0) return; if (!prev_desc && !desc->more) - *rmapp = (unsigned long)desc->sptes[0]; + *pte_list = (unsigned long)desc->sptes[0]; else if (prev_desc) prev_desc->more = desc->more; else - *rmapp = (unsigned long)desc->more | 1; - mmu_free_rmap_desc(desc); + *pte_list = (unsigned long)desc->more | 1; + mmu_free_pte_list_desc(desc); } -static void rmap_remove(struct kvm *kvm, u64 *spte) +static void pte_list_remove(u64 *spte, unsigned long *pte_list) { - struct kvm_rmap_desc *desc; - struct kvm_rmap_desc *prev_desc; - struct kvm_mmu_page *sp; - gfn_t gfn; - unsigned long *rmapp; + struct pte_list_desc *desc; + struct pte_list_desc *prev_desc; int i; - sp = page_header(__pa(spte)); - gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); - rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); - if (!*rmapp) { - printk(KERN_ERR "rmap_remove: %p 0->BUG\n", spte); + if (!*pte_list) { + printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte); BUG(); - } else if (!(*rmapp & 1)) { - rmap_printk("rmap_remove: %p 1->0\n", spte); - if ((u64 *)*rmapp != spte) { - printk(KERN_ERR "rmap_remove: %p 1->BUG\n", spte); + } else if (!(*pte_list & 1)) { + rmap_printk("pte_list_remove: %p 1->0\n", spte); + if ((u64 *)*pte_list != spte) { + printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte); BUG(); } - *rmapp = 0; + *pte_list = 0; } else { - rmap_printk("rmap_remove: %p many->many\n", spte); - desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); + rmap_printk("pte_list_remove: %p many->many\n", spte); + desc = (struct pte_list_desc *)(*pte_list & ~1ul); prev_desc = NULL; while (desc) { - for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) + for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) if (desc->sptes[i] == spte) { - rmap_desc_remove_entry(rmapp, + pte_list_desc_remove_entry(pte_list, desc, i, prev_desc); return; @@ -720,11 +716,59 @@ static void rmap_remove(struct kvm *kvm, u64 *spte) prev_desc = desc; desc = desc->more; } - pr_err("rmap_remove: %p many->many\n", spte); + pr_err("pte_list_remove: %p many->many\n", spte); BUG(); } } +/* + * Take gfn and return the reverse mapping to it. + */ +static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int level) +{ + struct kvm_memory_slot *slot; + struct kvm_lpage_info *linfo; + + slot = gfn_to_memslot(kvm, gfn); + if (likely(level == PT_PAGE_TABLE_LEVEL)) + return &slot->rmap[gfn - slot->base_gfn]; + + linfo = lpage_info_slot(gfn, slot, level); + + return &linfo->rmap_pde; +} + +static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) +{ + struct kvm_mmu_page *sp; + unsigned long *rmapp; + + if (!is_rmap_spte(*spte)) + return 0; + + sp = page_header(__pa(spte)); + kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); + rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level); + return pte_list_add(vcpu, spte, rmapp); +} + +static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) +{ + return pte_list_next(rmapp, spte); +} + +static void rmap_remove(struct kvm *kvm, u64 *spte) +{ + struct kvm_mmu_page *sp; + gfn_t gfn; + unsigned long *rmapp; + + sp = page_header(__pa(spte)); + gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); + rmapp = gfn_to_rmap(kvm, gfn, sp->role.level); + pte_list_remove(spte, rmapp); +} + static int set_spte_track_bits(u64 *sptep, u64 new_spte) { pfn_t pfn; @@ -752,32 +796,6 @@ static void drop_spte(struct kvm *kvm, u64 *sptep, u64 new_spte) rmap_remove(kvm, sptep); } -static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) -{ - struct kvm_rmap_desc *desc; - u64 *prev_spte; - int i; - - if (!*rmapp) - return NULL; - else if (!(*rmapp & 1)) { - if (!spte) - return (u64 *)*rmapp; - return NULL; - } - desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); - prev_spte = NULL; - while (desc) { - for (i = 0; i < RMAP_EXT && desc->sptes[i]; ++i) { - if (prev_spte == spte) - return desc->sptes[i]; - prev_spte = desc->sptes[i]; - } - desc = desc->more; - } - return NULL; -} - static int rmap_write_protect(struct kvm *kvm, u64 gfn) { unsigned long *rmapp; @@ -3601,8 +3619,8 @@ static void mmu_destroy_caches(void) { if (pte_chain_cache) kmem_cache_destroy(pte_chain_cache); - if (rmap_desc_cache) - kmem_cache_destroy(rmap_desc_cache); + if (pte_list_desc_cache) + kmem_cache_destroy(pte_list_desc_cache); if (mmu_page_header_cache) kmem_cache_destroy(mmu_page_header_cache); } @@ -3614,10 +3632,10 @@ int kvm_mmu_module_init(void) 0, 0, NULL); if (!pte_chain_cache) goto nomem; - rmap_desc_cache = kmem_cache_create("kvm_rmap_desc", - sizeof(struct kvm_rmap_desc), + pte_list_desc_cache = kmem_cache_create("pte_list_desc", + sizeof(struct pte_list_desc), 0, 0, NULL); - if (!rmap_desc_cache) + if (!pte_list_desc_cache) goto nomem; mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header", -- cgit v1.2.3 From 67052b3508f09956427d6476fd35e8fddde6c618 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 15 May 2011 23:27:08 +0800 Subject: KVM: MMU: remove the arithmetic of parent pte rmap Parent pte rmap and page rmap are very similar, so use the same arithmetic for them Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 7 +- arch/x86/kvm/mmu.c | 189 ++++++++++------------------------------ 2 files changed, 46 insertions(+), 150 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e6a4a57e142b..ff17deb6e98b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -227,14 +227,10 @@ struct kvm_mmu_page { * in this shadow page. */ DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); - bool multimapped; /* More than one parent_pte? */ bool unsync; int root_count; /* Current