summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/api.rst216
-rw-r--r--Documentation/virt/kvm/cpuid.rst88
-rw-r--r--Documentation/virt/kvm/devices/vcpu.rst57
-rw-r--r--arch/arm64/include/asm/assembler.h29
-rw-r--r--arch/arm64/include/asm/hyp_image.h36
-rw-r--r--arch/arm64/include/asm/kvm_asm.h187
-rw-r--r--arch/arm64/include/asm/kvm_host.h35
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h9
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h251
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h309
-rw-r--r--arch/arm64/include/asm/kvm_ptrauth.h6
-rw-r--r--arch/arm64/include/asm/percpu.h28
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h24
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h19
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h215
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h16
-rw-r--r--arch/arm64/kernel/image-vars.h3
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S13
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/arm.c70
-rw-r--r--arch/arm64/kvm/hyp.S34
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/entry.S95
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S76
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h4
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h15
-rw-r--r--arch/arm64/kvm/hyp/nvhe/.gitignore2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile62
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S187
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-init.S67
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c117
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp.lds.S19
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c48
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c892
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c31
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c4
-rw-r--r--arch/arm64/kvm/inject_fault.c1
-rw-r--r--arch/arm64/kvm/mmu.c1611
-rw-r--r--arch/arm64/kvm/pmu-emul.c195
-rw-r--r--arch/arm64/kvm/pmu.c13
-rw-r--r--arch/arm64/kvm/reset.c40
-rw-r--r--arch/arm64/kvm/sys_regs.c5
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c24
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c4
-rw-r--r--arch/mips/include/asm/kvm_host.h4
-rw-r--r--arch/mips/kvm/entry.c21
-rw-r--r--arch/mips/kvm/mips.c3
-rw-r--r--arch/mips/kvm/trap_emul.c2
-rw-r--r--arch/mips/kvm/vz.c2
-rw-r--r--arch/powerpc/include/asm/kvm_host.h1
-rw-r--r--arch/powerpc/kvm/book3s.c8
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c4
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c24
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S9
-rw-r--r--arch/powerpc/kvm/book3s_hv_nested.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c2
-rw-r--r--arch/powerpc/kvm/book3s_pr.c2
-rw-r--r--arch/powerpc/kvm/book3s_xics.c86
-rw-r--r--arch/powerpc/kvm/book3s_xive_native.c12
-rw-r--r--arch/powerpc/kvm/booke.c6
-rw-r--r--arch/x86/include/asm/kvm_host.h70
-rw-r--r--arch/x86/include/asm/svm.h90
-rw-r--r--arch/x86/include/asm/vmx.h2
-rw-r--r--arch/x86/include/uapi/asm/kvm.h20
-rw-r--r--arch/x86/include/uapi/asm/svm.h2
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/Makefile6
-rw-r--r--arch/x86/kvm/cpuid.c137
-rw-r--r--arch/x86/kvm/cpuid.h15
-rw-r--r--arch/x86/kvm/emulate.c22
-rw-r--r--arch/x86/kvm/hyperv.c11
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h2
-rw-r--r--arch/x86/kvm/lapic.c43
-rw-r--r--arch/x86/kvm/lapic.h1
-rw-r--r--arch/x86/kvm/mmu.h5
-rw-r--r--arch/x86/kvm/mmu/mmu.c974
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h88
-rw-r--r--arch/x86/kvm/mmu/mmutrace.h21
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h50
-rw-r--r--arch/x86/kvm/mmu/spte.c318
-rw-r--r--arch/x86/kvm/mmu/spte.h252
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.c182
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h60
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c1157
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h48
-rw-r--r--arch/x86/kvm/svm/avic.c7
-rw-r--r--arch/x86/kvm/svm/nested.c351
-rw-r--r--arch/x86/kvm/svm/sev.c26
-rw-r--r--arch/x86/kvm/svm/svm.c404
-rw-r--r--arch/x86/kvm/svm/svm.h103
-rw-r--r--arch/x86/kvm/trace.h128
-rw-r--r--arch/x86/kvm/vmx/capabilities.h10
-rw-r--r--arch/x86/kvm/vmx/nested.c178
-rw-r--r--arch/x86/kvm/vmx/posted_intr.c332
-rw-r--r--arch/x86/kvm/vmx/posted_intr.h99
-rw-r--r--arch/x86/kvm/vmx/vmcs.h7
-rw-r--r--arch/x86/kvm/vmx/vmenter.S34
-rw-r--r--arch/x86/kvm/vmx/vmx.c1259
-rw-r--r--arch/x86/kvm/vmx/vmx.h143
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h (renamed from arch/x86/kvm/vmx/ops.h)0
-rw-r--r--arch/x86/kvm/x86.c694
-rw-r--r--arch/x86/kvm/x86.h5
-rw-r--r--arch/x86/mm/fault.c13
-rw-r--r--include/kvm/arm_pmu.h5
-rw-r--r--include/linux/arm-smccc.h74
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/trace/events/kvm.h2
-rw-r--r--include/uapi/linux/kvm.h19
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h2
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c168
-rw-r--r--tools/testing/selftests/kvm/x86_64/user_msr_test.c248
-rw-r--r--virt/kvm/eventfd.c4
-rw-r--r--virt/kvm/kvm_main.c16
119 files changed, 8369 insertions, 4901 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 1f26d83e6b16..36d5f1f3c6dd 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4498,11 +4498,14 @@ Currently, the following list of CPUID leaves are returned:
- HYPERV_CPUID_ENLIGHTMENT_INFO
- HYPERV_CPUID_IMPLEMENT_LIMITS
- HYPERV_CPUID_NESTED_FEATURES
+ - HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS
+ - HYPERV_CPUID_SYNDBG_INTERFACE
+ - HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES
HYPERV_CPUID_NESTED_FEATURES leaf is only exposed when Enlightened VMCS was
enabled on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
-Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure
+Userspace invokes KVM_GET_SUPPORTED_HV_CPUID by passing a kvm_cpuid2 structure
with the 'nent' field indicating the number of entries in the variable-size
array 'entries'. If the number of entries is too low to describe all Hyper-V
feature leaves, an error (E2BIG) is returned. If the number is more or equal
@@ -4704,6 +4707,106 @@ KVM_PV_VM_VERIFY
Verify the integrity of the unpacked image. Only if this succeeds,
KVM is allowed to start protected VCPUs.
+4.126 KVM_X86_SET_MSR_FILTER
+----------------------------
+
+:Capability: KVM_X86_SET_MSR_FILTER
+:Architectures: x86
+:Type: vm ioctl
+:Parameters: struct kvm_msr_filter
+:Returns: 0 on success, < 0 on error
+
+::
+
+ struct kvm_msr_filter_range {
+ #define KVM_MSR_FILTER_READ (1 << 0)
+ #define KVM_MSR_FILTER_WRITE (1 << 1)
+ __u32 flags;
+ __u32 nmsrs; /* number of msrs in bitmap */
+ __u32 base; /* MSR index the bitmap starts at */
+ __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
+ };
+
+ #define KVM_MSR_FILTER_MAX_RANGES 16
+ struct kvm_msr_filter {
+ #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+ #define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
+ __u32 flags;
+ struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+ };
+
+flags values for ``struct kvm_msr_filter_range``:
+
+``KVM_MSR_FILTER_READ``
+
+ Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
+ indicates that a read should immediately fail, while a 1 indicates that
+ a read for a particular MSR should be handled regardless of the default
+ filter action.
+
+``KVM_MSR_FILTER_WRITE``
+
+ Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
+ indicates that a write should immediately fail, while a 1 indicates that
+ a write for a particular MSR should be handled regardless of the default
+ filter action.
+
+``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE``
+
+ Filter both read and write accesses to MSRs using the given bitmap. A 0
+ in the bitmap indicates that both reads and writes should immediately fail,
+ while a 1 indicates that reads and writes for a particular MSR are not
+ filtered by this range.
+
+flags values for ``struct kvm_msr_filter``:
+
+``KVM_MSR_FILTER_DEFAULT_ALLOW``
+
+ If no filter range matches an MSR index that is getting accessed, KVM will
+ fall back to allowing access to the MSR.
+
+``KVM_MSR_FILTER_DEFAULT_DENY``
+
+ If no filter range matches an MSR index that is getting accessed, KVM will
+ fall back to rejecting access to the MSR. In this mode, all MSRs that should
+ be processed by KVM need to explicitly be marked as allowed in the bitmaps.
+
+This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
+specify whether a certain MSR access should be explicitly filtered for or not.
+
+If this ioctl has never been invoked, MSR accesses are not guarded and the
+default KVM in-kernel emulation behavior is fully preserved.
+
+Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
+filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
+an error.
+
+As soon as the filtering is in place, every MSR access is processed through
+the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff);
+x2APIC MSRs are always allowed, independent of the ``default_allow`` setting,
+and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base
+register.
+
+If a bit is within one of the defined ranges, read and write accesses are
+guarded by the bitmap's value for the MSR index if the kind of access
+is included in the ``struct kvm_msr_filter_range`` flags. If no range
+cover this particular access, the behavior is determined by the flags
+field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW``
+and ``KVM_MSR_FILTER_DEFAULT_DENY``.
+
+Each bitmap range specifies a range of MSRs to potentially allow access on.
+The range goes from MSR index [base .. base+nmsrs]. The flags field
+indicates whether reads, writes or both reads and writes are filtered
+by setting a 1 bit in the bitmap for the corresponding MSR index.
+
+If an MSR access is not permitted through the filtering, it generates a
+#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
+allows user space to deflect and potentially handle various MSR accesses
+into user space.
+
+If a vCPU is in running state while this ioctl is invoked, the vCPU may
+experience inconsistent filtering behavior on MSR accesses.
+
5. The kvm_run structure
========================
@@ -4869,14 +4972,13 @@ to the byte array.
.. note::
- For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and
- KVM_EXIT_EPR the corresponding
-
-operations are complete (and guest state is consistent) only after userspace
-has re-entered the kernel with KVM_RUN. The kernel side will first finish
-incomplete operations and then check for pending signals. Userspace
-can re-enter the guest with an unmasked signal pending to complete
-pending operations.
+ For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR,
+ KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding
+ operations are complete (and guest state is consistent) only after userspace
+ has re-entered the kernel with KVM_RUN. The kernel side will first finish
+ incomplete operations and then check for pending signals. Userspace
+ can re-enter the guest with an unmasked signal pending to complete
+ pending operations.
::
@@ -5165,6 +5267,44 @@ if it decides to decode and emulate the instruction.
::
+ /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */
+ struct {
+ __u8 error; /* user -> kernel */
+ __u8 pad[7];
+ __u32 reason; /* kernel -> user */
+ __u32 index; /* kernel -> user */
+ __u64 data; /* kernel <-> user */
+ } msr;
+
+Used on x86 systems. When the VM capability KVM_CAP_X86_USER_SPACE_MSR is
+enabled, MSR accesses to registers that would invoke a #GP by KVM kernel code
+will instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR
+exit for writes.
+
+The "reason" field specifies why the MSR trap occurred. User space will only
+receive MSR exit traps when a particular reason was requested during through
+ENABLE_CAP. Currently valid exit reasons are:
+
+ KVM_MSR_EXIT_REASON_UNKNOWN - access to MSR that is unknown to KVM
+ KVM_MSR_EXIT_REASON_INVAL - access to invalid MSRs or reserved bits
+ KVM_MSR_EXIT_REASON_FILTER - access blocked by KVM_X86_SET_MSR_FILTER
+
+For KVM_EXIT_X86_RDMSR, the "index" field tells user space which MSR the guest
+wants to read. To respond to this request with a successful read, user space
+writes the respective data into the "data" field and must continue guest
+execution to ensure the read data is transferred into guest register state.
+
+If the RDMSR request was unsuccessful, user space indicates that with a "1" in