summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-05-17 10:33:30 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-05-17 10:33:30 -0700
commit0ef0fd351550130129bbdb77362488befd7b69d2 (patch)
tree23186172f5f85c06e18e3ee1a9619879df03c5df /arch/x86
parent4489da7183099f569a7d3dd819c975073c04bc72 (diff)
parentc011d23ba046826ccf8c4a4a6c1d01c9ccaa1403 (diff)
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini: "ARM: - support for SVE and Pointer Authentication in guests - PMU improvements POWER: - support for direct access to the POWER9 XIVE interrupt controller - memory and performance optimizations x86: - support for accessing memory not backed by struct page - fixes and refactoring Generic: - dirty page tracking improvements" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (155 commits) kvm: fix compilation on aarch64 Revert "KVM: nVMX: Expose RDPMC-exiting only when guest supports PMU" kvm: x86: Fix L1TF mitigation for shadow MMU KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible KVM: PPC: Book3S: Remove useless checks in 'release' method of KVM device KVM: PPC: Book3S HV: XIVE: Fix spelling mistake "acessing" -> "accessing" KVM: PPC: Book3S HV: Make sure to load LPID for radix VCPUs kvm: nVMX: Set nested_run_pending in vmx_set_nested_state after checks complete tests: kvm: Add tests for KVM_SET_NESTED_STATE KVM: nVMX: KVM_SET_NESTED_STATE - Tear down old EVMCS state before setting new state tests: kvm: Add tests for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_CPU_ID tests: kvm: Add tests to .gitignore KVM: Introduce KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 KVM: Fix kvm_clear_dirty_log_protect off-by-(minus-)one KVM: Fix the bitmap range to copy during clear dirty KVM: arm64: Fix ptrauth ID register masking logic KVM: x86: use direct accessors for RIP and RSP KVM: VMX: Use accessors for GPRs outside of dedicated caching logic KVM: x86: Omit caching logic for always-available GPRs kvm, x86: Properly check whether a pfn is an MMIO or not ...
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/events/intel/core.c6
-rw-r--r--arch/x86/include/asm/e820/api.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/msr-index.h8
-rw-r--r--arch/x86/kernel/e820.c18
-rw-r--r--arch/x86/kvm/cpuid.c12
-rw-r--r--arch/x86/kvm/hyperv.c24
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h42
-rw-r--r--arch/x86/kvm/lapic.c38
-rw-r--r--arch/x86/kvm/mmu.c23
-rw-r--r--arch/x86/kvm/mtrr.c10
-rw-r--r--arch/x86/kvm/paging_tmpl.h38
-rw-r--r--arch/x86/kvm/svm.c128
-rw-r--r--arch/x86/kvm/vmx/capabilities.h2
-rw-r--r--arch/x86/kvm/vmx/nested.c348
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c8
-rw-r--r--arch/x86/kvm/vmx/vmx.c90
-rw-r--r--arch/x86/kvm/vmx/vmx.h11
-rw-r--r--arch/x86/kvm/x86.c199
-rw-r--r--arch/x86/kvm/x86.h10
20 files changed, 568 insertions, 455 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 12ec402f4114..546d13e436aa 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2384,7 +2384,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
*/
if (__test_and_clear_bit(55, (unsigned long *)&status)) {
handled++;
- intel_pt_interrupt();
+ if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
+ perf_guest_cbs->handle_intel_pt_intr))
+ perf_guest_cbs->handle_intel_pt_intr();
+ else
+ intel_pt_interrupt();
}
/*
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 62be73b23d5c..e8f58ddd06d9 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -10,6 +10,7 @@ extern struct e820_table *e820_table_firmware;
extern unsigned long pci_mem_start;
+extern bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type);
extern bool e820__mapped_any(u64 start, u64 end, enum e820_type type);
extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c79abe7ca093..450d69a1e6fa 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -470,6 +470,7 @@ struct kvm_pmu {
u64 global_ovf_ctrl;
u64 counter_bitmask[2];
u64 global_ctrl_mask;
+ u64 global_ovf_ctrl_mask;
u64 reserved_bits;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
@@ -781,6 +782,9 @@ struct kvm_vcpu_arch {
/* Flush the L1 Data cache for L1TF mitigation on VMENTER */
bool l1tf_flush_l1d;
+
+ /* AMD MSRC001_0015 Hardware Configuration */
+ u64 msr_hwcr;
};
struct kvm_lpage_info {
@@ -1168,7 +1172,8 @@ struct kvm_x86_ops {
uint32_t guest_irq, bool set);
void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
- int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc);
+ int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
+ bool *expired);
void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
void (*setup_mce)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 88dd202c8b00..979ef971cc78 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -789,6 +789,14 @@
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+/* PERF_GLOBAL_OVF_CTL bits */
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT 62
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_OVF_BUF_BIT)
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT 63
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_COND_CHGD_BIT)
+
/* Geode defined MSRs */
#define MSR_GEODE_BUSCONT_CONF0 0x00001900
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 2879e234e193..76dd605ee2a3 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -73,12 +73,13 @@ EXPORT_SYMBOL(pci_mem_start);
* This function checks if any part of the range <start,end> is mapped
* with type.
*/
-bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
+static bool _e820__mapped_any(struct e820_table *table,
+ u64 start, u64 end, enum e820_type type)
{
int i;
- for (i = 0; i < e820_table->nr_entries; i++) {
- struct e820_entry *entry = &e820_table->entries[i];
+ for (i = 0; i < table->nr_entries; i++) {
+ struct e820_entry *entry = &table->entries[i];
if (type && entry->type != type)
continue;
@@ -88,6 +89,17 @@ bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
}
return 0;
}
+
+bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type)
+{
+ return _e820__mapped_any(e820_table_firmware, start, end, type);
+}
+EXPORT_SYMBOL_GPL(e820__mapped_raw_any);
+
+bool e820__mapped_any(u64 start, u64 end, enum e820_type type)
+{
+ return _e820__mapped_any(e820_table, start, end, type);
+}
EXPORT_SYMBOL_GPL(e820__mapped_any);
/*
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bbbe611f0c49..80a642a0143d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -963,13 +963,13 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
return 1;
- eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
- ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
+ eax = kvm_rax_read(vcpu);
+ ecx = kvm_rcx_read(vcpu);
kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx, true);
- kvm_register_write(vcpu, VCPU_REGS_RAX, eax);
- kvm_register_write(vcpu, VCPU_REGS_RBX, ebx);
- kvm_register_write(vcpu, VCPU_REGS_RCX, ecx);
- kvm_register_write(vcpu, VCPU_REGS_RDX, edx);
+ kvm_rax_write(vcpu, eax);
+ kvm_rbx_write(vcpu, ebx);
+ kvm_rcx_write(vcpu, ecx);
+ kvm_rdx_write(vcpu, edx);
return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index cc24b3a32c44..8ca4b39918e0 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1535,10 +1535,10 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
longmode = is_64_bit_mode(vcpu);
if (longmode)
- kvm_register_write(vcpu, VCPU_REGS_RAX, result);
+ kvm_rax_write(vcpu, result);
else {
- kvm_register_write(vcpu, VCPU_REGS_RDX, result >> 32);
- kvm_register_write(vcpu, VCPU_REGS_RAX, result & 0xffffffff);
+ kvm_rdx_write(vcpu, result >> 32);
+ kvm_rax_write(vcpu, result & 0xffffffff);
}
}
@@ -1611,18 +1611,18 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
longmode = is_64_bit_mode(vcpu);
if (!longmode) {
- param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff);
- ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff);
- outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) |
- (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff);
+ param = ((u64)kvm_rdx_read(vcpu) << 32) |
+ (kvm_rax_read(vcpu) & 0xffffffff);
+ ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
+ (kvm_rcx_read(vcpu) & 0xffffffff);
+ outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
+ (kvm_rsi_read(vcpu) & 0xffffffff);
}
#ifdef CONFIG_X86_64
else {
- param = kvm_register_read(vcpu, VCPU_REGS_RCX);
- ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX);
- outgpa = kvm_register_read(vcpu, VCPU_REGS_R8);
+ param = kvm_rcx_read(vcpu);
+ ingpa = kvm_rdx_read(vcpu);
+ outgpa = kvm_r8_read(vcpu);
}
#endif
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index f8f56a93358b..1cc6c47dc77e 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,6 +9,34 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_PGE)
+#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
+static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
+{ \
+ return vcpu->arch.regs[VCPU_REGS_##uname]; \
+} \
+static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \
+ unsigned long val) \
+{ \
+ vcpu->arch.regs[VCPU_REGS_##uname] = val; \
+}
+BUILD_KVM_GPR_ACCESSORS(rax, RAX)
+BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
+BUILD_KVM_GPR_ACCESSORS(rcx, RCX)
+BUILD_KVM_GPR_ACCESSORS(rdx, RDX)
+BUILD_KVM_GPR_ACCESSORS(rbp, RBP)
+BUILD_KVM_GPR_ACCESSORS(rsi, RSI)
+BUILD_KVM_GPR_ACCESSORS(rdi, RDI)
+#ifdef CONFIG_X86_64
+BUILD_KVM_GPR_ACCESSORS(r8, R8)
+BUILD_KVM_GPR_ACCESSORS(r9, R9)
+BUILD_KVM_GPR_ACCESSORS(r10, R10)
+BUILD_KVM_GPR_ACCESSORS(r11, R11)
+BUILD_KVM_GPR_ACCESSORS(r12, R12)
+BUILD_KVM_GPR_ACCESSORS(r13, R13)
+BUILD_KVM_GPR_ACCESSORS(r14, R14)
+BUILD_KVM_GPR_ACCESSORS(r15, R15)
+#endif
+
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
@@ -37,6 +65,16 @@ static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val)
kvm_register_write(vcpu, VCPU_REGS_RIP, val);
}
+static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu)
+{
+ return kvm_register_read(vcpu, VCPU_REGS_RSP);
+}
+
+static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ kvm_register_write(vcpu, VCPU_REGS_RSP, val);
+}
+
static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
{
might_sleep(); /* on svm */
@@ -83,8 +121,8 @@ static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
{
- return (kvm_register_read(vcpu, VCPU_REGS_RAX) & -1u)
- | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
+ return (kvm_rax_read(vcpu) & -1u)
+ | ((u64)(kvm_rdx_read(vcpu) & -1u) << 32);
}
static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index bd13fdddbdc4..4924f83ed4f3 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1454,7 +1454,7 @@ static void apic_timer_expired(struct kvm_lapic *apic)
if (swait_active(q))
swake_up_one(q);
- if (apic_lvtt_tscdeadline(apic))
+ if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
ktimer->expired_tscdeadline = ktimer->tscdeadline;
}
@@ -1696,37 +1696,42 @@ static void cancel_hv_timer(struct kvm_lapic *apic)
static bool start_hv_timer(struct kvm_lapic *apic)
{
struct kvm_timer *ktimer = &apic->lapic_timer;
- int r;
+ struct kvm_vcpu *vcpu = apic->vcpu;
+ bool expired;
WARN_ON(preemptible());
if (!kvm_x86_ops->set_hv_timer)
return false;
- if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
- return false;
-
if (!ktimer->tscdeadline)
return false;
- r = kvm_x86_ops->set_hv_timer(apic->vcpu, ktimer->tscdeadline);
- if (r < 0)
+ if (kvm_x86_ops->set_hv_timer(vcpu, ktimer->tscdeadline, &expired))
return false;
ktimer->hv_timer_in_use = true;
hrtimer_cancel(&ktimer->timer);
/*
- * Also recheck ktimer->pending, in case the sw timer triggered in
- * the window. For periodic timer, leave the hv timer running for
- * simplicity, and the deadline will be recomputed on the next vmexit.
+ * To simplify handling the periodic timer, leave the hv timer running
+ * even if the deadline timer has expired, i.e. rely on the resulting
+ * VM-Exit to recompute the periodic timer's target expiration.
*/
- if (!apic_lvtt_period(apic) && (r || atomic_read(&ktimer->pending))) {
- if (r)
+ if (!apic_lvtt_period(apic)) {
+ /*
+ * Cancel the hv timer if the sw timer fired while the hv timer
+ * was being programmed, or if the hv timer itself expired.
+ */
+ if (atomic_read(&ktimer->pending)) {
+ cancel_hv_timer(apic);
+ } else if (expired) {
apic_timer_expired(apic);
- return false;
+ cancel_hv_timer(apic);
+ }
}
- trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, true);
+ trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
+
return true;
}
@@ -1750,8 +1755,13 @@ static void start_sw_timer(struct kvm_lapic *apic)
static void restart_apic_timer(struct kvm_lapic *apic)
{
preempt_disable();
+
+ if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
+ goto out;
+
if (!start_hv_timer(apic))
start_sw_timer(apic);
+out:
preempt_enable();
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d9c7b45d231f..1e9ba81accba 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -44,6 +44,7 @@
#include <asm/page.h>
#include <asm/pat.h>
#include <asm/cmpxchg.h>
+#include <asm/e820/api.h>
#include <asm/io.h>
#include <asm/vmx.h>
#include <asm/kvm_page_track.h>
@@ -487,16 +488,24 @@ static void kvm_mmu_reset_all_pte_masks(void)
* If the CPU has 46 or less physical address bits, then set an
* appropriate mask to guard against L1TF attacks. Otherwise, it is
* assumed that the CPU is not vulnerable to L1TF.
+ *
+ * Some Intel CPUs address the L1 cache using more PA bits than are
+ * reported by CPUID. Use the PA width of the L1 cache when possible
+ * to achieve more effective mitigation, e.g. if system RAM overlaps
+ * the most significant bits of legal physical address space.
*/
- low_phys_bits = boot_cpu_data.x86_phys_bits;
- if (boot_cpu_data.x86_phys_bits <
+ shadow_nonpresent_or_rsvd_mask = 0;
+ low_phys_bits = boot_cpu_data.x86_cache_bits;
+ if (boot_cpu_data.x86_cache_bits <
52 - shadow_nonpresent_or_rsvd_mask_len) {
shadow_nonpresent_or_rsvd_mask =
- rsvd_bits(boot_cpu_data.x86_phys_bits -
+ rsvd_bits(boot_cpu_data.x86_cache_bits -
shadow_nonpresent_or_rsvd_mask_len,
- boot_cpu_data.x86_phys_bits - 1);
+ boot_cpu_data.x86_cache_bits - 1);
low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len;
- }
+ } else
+ WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF));
+
shadow_nonpresent_or_rsvd_lower_gfn_mask =
GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT);
}
@@ -2892,7 +2901,9 @@ static bool kvm_is_mmio_pfn(kvm_pfn_t pfn)
*/
(!pat_enabled() || pat_pfn_immune_to_uc_mtrr(pfn));
- return true;
+ return !e820__mapped_raw_any(pfn_to_hpa(pfn),
+ pfn_to_hpa(pfn + 1) - 1,
+ E820_TYPE_RAM);
}
/* Bits which may be returned by set_spte() */
diff --git a/arch/x86/kvm/mtrr.c b/arch/x86/kvm/mtrr.c
index e9ea2d45ae66..9f72cc427158 100644
--- a/arch/x86/kvm/mtrr.c
+++ b/arch/x86/kvm/mtrr.c
@@ -48,11 +48,6 @@ static bool msr_mtrr_valid(unsigned msr)
return false;
}
-static bool valid_pat_type(unsigned t)
-{
- return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
-}
-
static bool valid_mtrr_type(unsigned t)
{
return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
@@ -67,10 +62,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return false;
if (msr == MSR_IA32_CR_PAT) {
- for (i = 0; i < 8; i++)
- if (!valid_pat_type((data >> (i * 8)) & 0xff))
- return false;
- return true;
+ return kvm_pat_valid(data);
} else if (msr == MSR_MTRRdefType) {
if (data & ~0xcff)
return false;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 08715034e315..367a47df4ba0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -141,15 +141,35 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
struct page *page;
npages = get_user_pages_fast((unsigned long)ptep_user, 1, FOLL_WRITE, &page);
- /* Check if the user is doing something meaningless. */
- if (unlikely(npages != 1))
- return -EFAULT;
-
- table = kmap_atomic(page);
- ret = CMPXCHG(&table[index], orig_pte, new_pte);
- kunmap_atomic(table);
-
- kvm_release_page_dirty(page);
+ if (likely(npages == 1)) {
+ table = kmap_atomic(page);
+ ret = CMPXCHG(&table[index], orig_pte, new_pte);
+ kunmap_atomic(table);
+
+ kvm_release_page_dirty(page);
+ } else {
+ struct vm_area_struct *vma;
+ unsigned long vaddr = (unsigned long)ptep_user & PAGE_MASK;
+ unsigned long pfn;
+ unsigned long paddr;
+
+ down_read(&current->mm->mmap_sem);
+ vma = find_vma_intersection(current->mm, vaddr, vaddr + PAGE_SIZE);
+ if (!vma || !(vma->vm_flags & VM_PFNMAP)) {
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+ pfn = ((vaddr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ paddr = pfn << PAGE_SHIFT;
+ table = memremap(paddr, PAGE_SIZE, MEMREMAP_WB);
+ if (!table) {
+ up_read(&current->mm->mmap_sem);
+ return -EFAULT;
+ }
+ ret = CMPXCHG(&table[index], orig_pte, new_pte);
+ memunmap(table);
+ up_read(&current->mm->mmap_sem);
+ }
return (ret != orig_pte);
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6b92eaf4a3b1..a849dcb7fbc5 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2091,7 +2091,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
init_vmcb(svm);
kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, true);
- kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
+ kvm_rdx_write(vcpu, eax);
if (kvm_vcpu_apicv_active(vcpu) && !init_event)
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
@@ -3071,32 +3071,6 @@ static inline bool nested_svm_nmi(struct vcpu_svm *svm)
return false;
}
-static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
-{
- struct page *page;
-
- might_sleep();
-
- page = kvm_vcpu_gfn_to_page(&svm->vcpu, gpa >> PAGE_SHIFT);
- if (is_error_page(page))
- goto error;
-
- *_page = page;
-
- return kmap(page);
-
-error:
- kvm_inject_gp(&svm->vcpu, 0);
-
- return NULL;
-}
-
-static void nested_svm_unmap(struct page *page)
-{
- kunmap(page);
- kvm_release_page_dirty(page);
-}
-
static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
{
unsigned port, size, iopm_len;
@@ -3299,10 +3273,11 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
static int nested_svm_vmexit(struct vcpu_svm *svm)
{
+ int rc;
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
- struct page *page;
+ struct kvm_host_map map;
trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
vmcb->control.exit_info_1,
@@ -3311,9 +3286,14 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb->control.exit_int_info_err,
KVM_ISA_SVM);
- nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
- if (!nested_vmcb)
+ rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map);
+ if (rc) {
+ if (rc == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return 1;
+ }
+
+ nested_vmcb = map.hva;
/* Exit Guest-Mode */
leave_guest_mode(&svm->vcpu);
@@ -3408,16 +3388,16 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
} else {
(void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
}
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
+ kvm_rax_write(&svm->vcpu, hsave->save.rax);
+ kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
+ kvm_rip_write(&svm->vcpu, hsave->save.rip);
svm->vmcb->save.dr7 = 0;
svm->vmcb->save.cpl = 0;
svm->vmcb->control.exit_int_info = 0;
mark_all_dirty(svm->vmcb);
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
nested_svm_uninit_mmu_context(&svm->vcpu);
kvm_mmu_reset_context(&svm->vcpu);
@@ -3483,7 +3463,7 @@ static bool nested_vmcb_checks(struct vmcb *vmcb)
}
static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- struct vmcb *nested_vmcb, struct page *page)
+ struct vmcb *nested_vmcb, struct kvm_host_map *map)
{
if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
svm->vcpu.arch.hflags |= HF_HIF_MASK;
@@ -3516,9 +3496,9 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
kvm_mmu_reset_context(&svm->vcpu);
svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
+ kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax);
+ kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp);
+ kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip);
/* In case we don't even reach vcpu_run, the fields are not updated */
svm->vmcb->save.rax = nested_vmcb->save.rax;
@@ -3567,7 +3547,7 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
svm->vmcb->control.pause_filter_thresh =
nested_vmcb->control.pause_filter_thresh;
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, map, true);
/* Enter Guest-Mode */
enter_guest_mode(&svm->vcpu);
@@ -3587,17 +3567,23 @@ static void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
static bool nested_svm_vmrun(struct vcpu_svm *svm)
{
+ int rc;
struct vmcb *nested_vmcb;
struct vmcb *hsave = svm->nested.hsave;
struct vmcb *vmcb = svm->vmcb;
- struct page *page;
+ struct kvm_host_map map;
u64 vmcb_gpa;
vmcb_gpa = svm->vmcb->save.rax;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
- if (!nested_vmcb)
+ rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map);
+ if (rc) {
+ if (rc == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return false;
+ }
+
+ nested_vmcb = map.hva;
if (!nested_vmcb_checks(nested_vmcb)) {
nested_vmcb->control.exit_code = SVM_EXIT_ERR;
@@ -3605,7 +3591,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
nested_vmcb->control.exit_info_1 = 0;
nested_vmcb->control.exit_info_2 = 0;
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
return false;
}
@@ -3649,7 +3635,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
copy_vmcb_control_area(hsave, vmcb);
- enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, page);
+ enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map);
return true;
}
@@ -3673,21 +3659,26 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
static int vmload_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
- struct page *page;
+ struct kvm_host_map map;
int ret;
if (nested_svm_check_permissions(svm))
return 1;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
- if (!nested_vmcb)
+ ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
+ if (ret) {
+ if (ret == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return 1;
+ }
+
+ nested_vmcb = map.hva;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
return ret;
}
@@ -3695,21 +3686,26 @@ static int vmload_interception(struct vcpu_svm *svm)
static int vmsave_interception(struct vcpu_svm *svm)
{
struct vmcb *nested_vmcb;
- struct page *page;
+ struct kvm_host_map map;
int ret;
if (nested_svm_check_permissions(svm))
return 1;
- nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
- if (!nested_vmcb)
+ ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->vmcb->save.rax), &map);
+ if (ret) {
+ if (ret == -EINVAL)
+ kvm_inject_gp(&svm->vcpu, 0);
return 1;
+ }
+
+ nested_vmcb = map.hva;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
- nested_svm_unmap(page);
+ kvm_vcpu_unmap(&svm->vcpu, &map, true);
return ret;
}
@@ -3791,11 +3787,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
- kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+ trace_kvm_invlpga(svm->vmcb->save.rip, kvm_rcx_read(&svm->vcpu),
+ kvm_rax_read(&svm->vcpu));
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
- kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+ kvm_mmu_invlpg(vcpu, kvm_rax_read(&svm->vcpu));
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3803,7 +3799,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
static int skinit_interception(struct vcpu_svm *svm)
{
- trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
+ trace_kvm_skinit(svm->vmcb->save.rip, kvm_rax_read(&svm->vcpu));
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
@@ -3817,7 +3813,7 @@ static int wbinvd_interception(struct vcpu_svm *svm)
static int xsetbv_interception(struct vcpu_svm *svm)
{
u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
- u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u32 index = kvm_rcx_read(&svm->vcpu);
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
@@ -4213,7 +4209,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int rdmsr_interception(struct vcpu_svm *svm)
{
- u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u32 ecx = kvm_rcx_read(&svm->vcpu);
struct msr_data msr_info;
msr_info.index = ecx;
@@ -4225,10 +4221,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
} else {
trace_kvm_msr_read(ecx, msr_info.data);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RAX,
- msr_info.data & 0xffffffff);
- kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
- msr_info.data >> 32);
+ kvm_rax_write(&svm->vcpu, msr_info.data & 0xffffffff);
+ kvm_rdx_write(&svm->vcpu, msr_info.data >> 32);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
return kvm_skip_emulated_instruction(&svm->vcpu);
}
@@ -4422,7 +4416,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
static int wrmsr_interception(struct vcpu_svm *svm)
{
struct msr_data msr;
- u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u32 ecx = kvm_rcx_read(&svm->vcpu);
u64 data = kvm_read_edx_eax(&svm->vcpu);
msr.data = data;
@@ -6236,7 +6230,7 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *nested_vmcb;
- struct page *page;
+ struct kvm_host_map map;
u64 guest;
u64 vmcb;
@@ -6244,10 +6238,10 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
vmcb = GET_SMSTATE(u64, smstate, 0x7ee0);
if (guest) {
- nested_vmcb = nested_svm_map(svm, vmcb, &page);
- if (!nested_vmcb)
+ if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL)
return 1;
- enter_svm_guest_mode(svm, vmcb, nested_vmcb, page);
+ nested_vmcb = map.hva;
+ enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map);
}
return 0;
}
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 854e144131c6..d6664ee3d127 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -2,6 +2,8 @@
#ifndef __KVM_X86_VMX_CAPS_H
#define __KVM_X86_VMX_CAPS_H
+#include <asm/vmx.h>
+
#include "lapic.h"
extern bool __read_mostly enable_vpid;
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0c601d079cd2..f1a69117ac0f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -193,10 +193,8 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
if (!vmx->nested.hv_evmcs)
return;
- kunmap(vmx->nested.hv_evmcs_page);
- kvm_release_page_dirty(vmx->nested.hv_evmcs_page);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
vmx->nested.hv_evmcs_vmptr = -1ull;
- vmx->nested.hv_evmcs_page = NULL;
vmx->nested.hv_evmcs = NULL;
}
@@ -229,16 +227,9 @@ static void free_nested(struct kvm_vcpu *vcpu)
kvm_release_page_dirty(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
- if (vmx->nested.virtual_apic_page) {
- kvm_release_page_dirty(vmx->nested.virtual_apic_page);
- vmx->nested.virtual_apic_page = NULL;
- }
- if (vmx->nested.pi_desc_page) {
- kunmap(vmx->nested.pi_desc_page);
- kvm_release_page_dirty(vmx->nested.pi_desc_page);
- vmx->nested.pi_desc_page = NULL;
- vmx->nested.pi_desc = NULL;
- }
+ kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
+ vmx->nested.pi_des