summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-11-24 19:44:25 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2017-11-24 19:44:25 -1000
commit7753ea09640807104c8e353f6d5dc47ee55307cf (patch)
tree7db719affec0564593ed85a283de8fd9464177d3
parent83ada0319606c6bbaebda16fad456e37ed64d518 (diff)
parentd02fcf50779ec9d8eb7a81473fd76efe3f04b3a5 (diff)
Merge tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Radim Krčmář: "Trimmed second batch of KVM changes for Linux 4.15: - GICv4 Support for KVM/ARM - re-introduce support for CPUs without virtual NMI (cc stable) and allow testing of KVM without virtual NMI on available CPUs - fix long-standing performance issues with assigned devices on AMD (cc stable)" * tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits) kvm: vmx: Allow disabling virtual NMI support kvm: vmx: Reinstate support for CPUs without virtual NMI KVM: SVM: obey guest PAT KVM: arm/arm64: Don't queue VLPIs on INV/INVALL KVM: arm/arm64: Fix GICv4 ITS initialization issues KVM: arm/arm64: GICv4: Theory of operations KVM: arm/arm64: GICv4: Enable VLPI support KVM: arm/arm64: GICv4: Prevent userspace from changing doorbell affinity KVM: arm/arm64: GICv4: Prevent a VM using GICv4 from being saved KVM: arm/arm64: GICv4: Enable virtual cpuif if VLPIs can be delivered KVM: arm/arm64: GICv4: Hook vPE scheduling into vgic flush/sync KVM: arm/arm64: GICv4: Use the doorbell interrupt as an unblocking source KVM: arm/arm64: GICv4: Add doorbell interrupt handling KVM: arm/arm64: GICv4: Use pending_last as a scheduling hint KVM: arm/arm64: GICv4: Handle INVALL applied to a vPE KVM: arm/arm64: GICv4: Propagate property updates to VLPIs KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE KVM: arm/arm64: GICv4: Handle CLEAR applied to a VLPI KVM: arm/arm64: GICv4: Propagate affinity changes to the physical ITS KVM: arm/arm64: GICv4: Unmap VLPI when freeing an LPI ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic-its.txt2
-rw-r--r--arch/arm/kvm/Kconfig5
-rw-r--r--arch/arm/kvm/Makefile1
-rw-r--r--arch/arm64/kvm/Kconfig3
-rw-r--r--arch/arm64/kvm/Makefile1
-rw-r--r--arch/x86/kvm/svm.c7
-rw-r--r--arch/x86/kvm/vmx.c161
-rw-r--r--include/kvm/arm_vgic.h41
-rw-r--r--virt/kvm/arm/arch_timer.c24
-rw-r--r--virt/kvm/arm/arm.c48
-rw-r--r--virt/kvm/arm/hyp/vgic-v3-sr.c9
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c7
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c204
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c5
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c14
-rw-r--r--virt/kvm/arm/vgic/vgic-v4.c364
-rw-r--r--virt/kvm/arm/vgic/vgic.c67
-rw-r--r--virt/kvm/arm/vgic/vgic.h10
19 files changed, 819 insertions, 158 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b44217290e57..6571fbfdb2a1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1890,6 +1890,10 @@
[KVM,ARM] Trap guest accesses to GICv3 common
system registers
+ kvm-arm.vgic_v4_enable=
+ [KVM,ARM] Allow use of GICv4 for direct injection of
+ LPIs.
+
kvm-intel.ept= [KVM,Intel] Disable extended page tables
(virtualized MMU) support on capable Intel chips.
Default is 1 (enabled)
diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
index 8d5830eab26a..4f0c9fc40365 100644
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
@@ -64,6 +64,8 @@ Groups:
-EINVAL: Inconsistent restored data
-EFAULT: Invalid guest ram access
-EBUSY: One or more VCPUS are running
+ -EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
+ state is not available
KVM_DEV_ARM_VGIC_GRP_ITS_REGS
Attributes:
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index f24628db5409..e2bd35b6780c 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -4,6 +4,7 @@
#
source "virt/kvm/Kconfig"
+source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -23,6 +24,8 @@ config KVM
select PREEMPT_NOTIFIERS
select ANON_INODES
select ARM_GIC
+ select ARM_GIC_V3
+ select ARM_GIC_V3_ITS
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
@@ -36,6 +39,8 @@ config KVM
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
select HAVE_KVM_MSI
+ select IRQ_BYPASS_MANAGER
+ select HAVE_KVM_IRQ_BYPASS
depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
Support hosting virtualized guest machines.
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index f550abd64a25..48de846f2246 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o
obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
obj-y += $(KVM)/arm/vgic/vgic-v2.o
obj-y += $(KVM)/arm/vgic/vgic-v3.o
+obj-y += $(KVM)/arm/vgic/vgic-v4.o
obj-y += $(KVM)/arm/vgic/vgic-mmio.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 13f81f971390..2257dfcc44cc 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -4,6 +4,7 @@
#
source "virt/kvm/Kconfig"
+source "virt/lib/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
@@ -36,6 +37,8 @@ config KVM
select HAVE_KVM_MSI
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQ_ROUTING
+ select IRQ_BYPASS_MANAGER
+ select HAVE_KVM_IRQ_BYPASS
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 861acbbac385..87c4f7ae24de 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b71daed3cca2..59e13a79c2e3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
u32 ecx = msr->index;
u64 data = msr->data;
switch (ecx) {
+ case MSR_IA32_CR_PAT:
+ if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
+ return 1;
+ vcpu->arch.pat = data;
+ svm->vmcb->save.g_pat = data;
+ mark_dirty(svm->vmcb, VMCB_NPT);
+ break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr);
break;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7c3522a989d0..714a0673ec3c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
static bool __read_mostly enable_vpid = 1;
module_param_named(vpid, enable_vpid, bool, 0444);
+static bool __read_mostly enable_vnmi = 1;
+module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
+
static bool __read_mostly flexpriority_enabled = 1;
module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
@@ -202,6 +205,10 @@ struct loaded_vmcs {
bool nmi_known_unmasked;
unsigned long vmcs_host_cr3; /* May not match real cr3 */
unsigned long vmcs_host_cr4; /* May not match real cr4 */
+ /* Support for vnmi-less CPUs */
+ int soft_vnmi_blocked;
+ ktime_t entry_time;
+ s64 vnmi_blocked_time;
struct list_head loaded_vmcss_on_cpu_link;
};
@@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}
+static inline bool cpu_has_virtual_nmis(void)
+{
+ return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
+}
+
static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
(vmcs12->secondary_vm_exec_control & bit);
}
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
-{
- return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
-}
-
static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
{
return vmcs12->pin_based_vm_exec_control &
@@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
- PIN_BASED_VIRTUAL_NMIS;
- opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+ opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
+ PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!kvm_vcpu_apicv_active(&vmx->vcpu))
pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
+
+ if (!enable_vnmi)
+ pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
+
/* Enable the preemption timer dynamically */
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
@@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
- if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ if (!enable_vnmi ||
+ vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
@@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ if (!enable_vnmi) {
+ /*
+ * Tracking the NMI-blocked state in software is built upon
+ * finding the next open IRQ window. This, in turn, depends on
+ * well-behaving guests: They have to keep IRQs disabled at
+ * least as long as the NMI handler runs. Otherwise we may
+ * cause NMI nesting, maybe breaking the guest. But as this is
+ * highly unlikely, we can live with the residual risk.
+ */
+ vmx->loaded_vmcs->soft_vnmi_blocked = 1;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+
++vcpu->stat.nmi_injections;
vmx->loaded_vmcs->nmi_known_unmasked = false;
@@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool masked;
+ if (!enable_vnmi)
+ return vmx->loaded_vmcs->soft_vnmi_blocked;
if (vmx->loaded_vmcs->nmi_known_unmasked)
return false;
masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5735,13 +5762,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx->loaded_vmcs->nmi_known_unmasked = !masked;
- if (masked)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
+ if (!enable_vnmi) {
+ if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = masked;
+ vmx->loaded_vmcs->vnmi_blocked_time = 0;
+ }
+ } else {
+ vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+ if (masked)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ }
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5749,6 +5783,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
+ if (!enable_vnmi &&
+ to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
+ return 0;
+
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
@@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
+ WARN_ON_ONCE(!enable_vnmi);
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits;
@@ -6758,6 +6798,9 @@ static __init int hardware_setup(void)
if (!cpu_has_vmx_flexpriority())
flexpriority_enabled = 0;
+ if (!cpu_has_virtual_nmis())
+ enable_vnmi = 0;
+
/*
* set_apic_access_page_addr() is used to reload apic access
* page upon invalidation. No need to do anything if not
@@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
}
/* Create a new VMCS */
- item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+ item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
if (!item)
return NULL;
item->vmcs02.vmcs = alloc_vmcs();
@@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
+ enable_vnmi &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
+ if (unlikely(!enable_vnmi &&
+ vmx->loaded_vmcs->soft_vnmi_blocked)) {
+ if (vmx_interrupt_allowed(vcpu)) {
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
+ vcpu->arch.nmi_pending) {
+ /*
+ * This CPU don't support us in finding the end of an
+ * NMI-blocked window if the guest runs with IRQs
+ * disabled. So we pull the trigger after 1 s of
+ * futile waiting, but inform the user about this.
+ */
+ printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
+ "state on VCPU %d after 1 s timeout\n",
+ __func__, vcpu->vcpu_id);
+ vmx->loaded_vmcs->soft_vnmi_blocked = 0;
+ }
+ }
+
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
@@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
- if (vmx->loaded_vmcs->nmi_known_unmasked)
- return;
- /*
- * Can't use vmx->exit_intr_info since we're not sure what
- * the exit reason is.
- */
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- /*
- * SDM 3: 27.7.1.2 (September 2008)
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
- * a guest IRET fault.
- * SDM 3: 23.2.2 (September 2008)
- * Bit 12 is undefined in any of the following cases:
- * If the VM exit sets the valid bit in the IDT-vectoring
- * information field.
- * If the VM exit is due to a double fault.
- */
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
- vector != DF_VECTOR && !idtv_info_valid)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmx->loaded_vmcs->nmi_known_unmasked =
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
- & GUEST_INTR_STATE_NMI);
+ if (enable_vnmi) {
+ if (vmx->loaded_vmcs->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ /*
+ * SDM 3: 27.7.1.2 (September 2008)
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ * a guest IRET fault.
+ * SDM 3: 23.2.2 (September 2008)
+ * Bit 12 is undefined in any of the following cases:
+ * If the VM exit sets the valid bit in the IDT-vectoring
+ * information field.
+ * If the VM exit is due to a double fault.
+ */
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ vector != DF_VECTOR && !idtv_info_valid)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmx->loaded_vmcs->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
+ } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->vnmi_blocked_time +=
+ ktime_to_ns(ktime_sub(ktime_get(),
+ vmx->loaded_vmcs->entry_time));
}
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr3, cr4;
+ /* Record the guest's net vcpu time for enforced NMI injections. */
+ if (unlikely(!enable_vnmi &&
+ vmx->loaded_vmcs->soft_vnmi_blocked))
+ vmx->loaded_vmcs->entry_time = ktime_get();
+
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 34dba516ef24..8c896540a72c 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -26,6 +26,8 @@
#include <linux/list.h>
#include <linux/jump_label.h>
+#include <linux/irqchip/arm-gic-v4.h>
+
#define VGIC_V3_MAX_CPUS 255
#define VGIC_V2_MAX_CPUS 8
#define VGIC_NR_IRQS_LEGACY 256
@@ -73,6 +75,9 @@ struct vgic_global {
/* Only needed for the legacy KVM_CREATE_IRQCHIP */
bool can_emulate_gicv2;
+ /* Hardware has GICv4? */
+ bool has_gicv4;
+
/* GIC system register CPU interface */
struct static_key_false gicv3_cpuif;
@@ -116,6 +121,7 @@ struct vgic_irq {
bool hw; /* Tied to HW IRQ */
struct kref refcount; /* Used for LPIs */
u32 hwintid; /* HW INTID number */
+ unsigned int host_irq; /* linux irq corresponding to hwintid */
union {
u8 targets; /* GICv2 target VCPUs mask */
u32 mpidr; /* GICv3 target VCPU */
@@ -232,6 +238,15 @@ struct vgic_dist {
/* used by vgic-debug */
struct vgic_state_iter *iter;
+
+ /*
+ * GICv4 ITS per-VM data, containing the IRQ domain, the VPE
+ * array, the property table pointer as well as allocation
+ * data. This essentially ties the Linux IRQ core and ITS
+ * together, and avoids leaking KVM's data structures anywhere
+ * else.
+ */
+ struct its_vm its_vm;
};
struct vgic_v2_cpu_if {
@@ -250,6 +265,14 @@ struct vgic_v3_cpu_if {
u32 vgic_ap0r[4];
u32 vgic_ap1r[4];
u64 vgic_lr[VGIC_V3_MAX_LRS];
+
+ /*
+ * GICv4 ITS per-VPE data, containing the doorbell IRQ, the
+ * pending table pointer, the its_vm pointer and a few other
+ * HW specific things. As for the its_vm structure, this is
+ * linking the Linux IRQ subsystem and the ITS together.
+ */
+ struct its_vpe its_vpe;
};
struct vgic_cpu {
@@ -307,9 +330,10 @@ void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
bool level, void *owner);
-int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq);
-int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq);
-bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
+int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
+ u32 vintid);
+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
@@ -349,4 +373,15 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm);
int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner);
+struct kvm_kernel_irq_routing_entry;
+
+int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
+ struct kvm_kernel_irq_routing_entry *irq_entry);
+
+int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
+ struct kvm_kernel_irq_routing_entry *irq_entry);
+
+void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
+void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
+
#endif /* __KVM_ARM_VGIC_H */
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 4db54ff08d9e..4151250ce8da 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -817,9 +817,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
- struct irq_desc *desc;
- struct irq_data *data;
- int phys_irq;
int ret;
if (timer->enabled)
@@ -837,26 +834,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
return -EINVAL;
}
- /*
- * Find the physical IRQ number corresponding to the host_vtimer_irq
- */
- desc = irq_to_desc(host_vtimer_irq);
- if (!desc) {
- kvm_err("%s: no interrupt descriptor\n", __func__);
- return -EINVAL;
- }
-
- data = irq_desc_get_irq_data(desc);
- while (data->parent_data)
- data = data->parent_data;
-
- phys_irq = data->hwirq;
-
- /*
- * Tell the VGIC that the virtual interrupt is tied to a
- * physical interrupt. We do that once per VCPU.
- */
- ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq);
+ ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq);
if (ret)
return ret;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 772bf74ac2e9..a6524ff27de4 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -27,6 +27,8 @@
#include <linux/mman.h>
#include <linux/sched.h>
#include <linux/kvm.h>
+#include <linux/kvm_irqfd.h>
+#include <linux/irqbypass.h>
#include <trace/events/kvm.h>
#include <kvm/arm_pmu.h>
@@ -175,6 +177,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
{
int i;
+ kvm_vgic_destroy(kvm);
+
free_percpu(kvm->arch.last_vcpu_ran);
kvm->arch.last_vcpu_ran = NULL;
@@ -184,8 +188,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm->vcpus[i] = NULL;
}
}
-
- kvm_vgic_destroy(kvm);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -313,11 +315,13 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
kvm_timer_schedule(vcpu);
+ kvm_vgic_v4_enable_doorbell(vcpu);
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
kvm_timer_unschedule(vcpu);
+ kvm_vgic_v4_disable_doorbell(vcpu);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -1450,6 +1454,46 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
return NULL;
}
+bool kvm_arch_has_irq_bypass(void)
+{
+ return true;
+}
+
+int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
+ &irqfd->irq_entry);
+}
+void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
+ struct irq_bypass_producer *prod)
+{
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
+ &irqfd->irq_entry);
+}
+
+void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
+{
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ kvm_arm_halt_guest(irqfd->kvm);
+}
+
+void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
+{
+ struct kvm_kernel_irqfd *irqfd =
+ container_of(cons, struct kvm_kernel_irqfd, consumer);
+
+ kvm_arm_resume_guest(irqfd->kvm);
+}
+
/**
* Initialize Hyp-mode and memory mappings on all CPUs.
*/
diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c
index 91728faa13fd..f5c3d6d7019e 100644
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@@ -258,7 +258,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
}
} else {
- if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+ cpu_if->its_vpe.its_vm)
write_gicreg(0, ICH_HCR_EL2);
cpu_if->vgic_elrsr = 0xffff;
@@ -337,9 +338,11 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
/*
* If we need to trap system registers, we must write
* ICH_HCR_EL2 anyway, even if no interrupts are being
- * injected,
+ * injected. Same thing if GICv4 is used, as VLPI
+ * delivery is gated by ICH_HCR_EL2.En.
*/
- if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+ if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
+ cpu_if->its_vpe.its_vm)
write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
}
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 5801261f3add..62310122ee78 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -285,6 +285,10 @@ int vgic_init(struct kvm *kvm)
if (ret)
goto out;
+ ret = vgic_v4_init(kvm);
+ if (ret)
+ goto out;
+
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
@@ -320,6 +324,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
kfree(dist->spis);
dist->nr_spis = 0;
+
+ if (vgic_supports_direct_msis(kvm))
+ vgic_v4_teardown(kvm);
}
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index d2a99ab0ade7..1f761a9991e7 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -38,7 +38,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its);
static int vgic_its_restore_tables_v0(struct vgic_its *its);
static int vgic_its_commit_v0(struct vgic_its *its);
static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
- struct kvm_vcpu *filter_vcpu);
+ struct kvm_vcpu *filter_vcpu, bool needs_inv);
/*
* Creates a new (reference to a) struct vgic_irq for a given LPI.
@@ -106,7 +106,7 @@ out_unlock:
* However we only have those structs for mapped IRQs, so we read in
* the respective config data from memory here upon mapping the LPI.
*/
- ret = update_lpi_config(kvm, irq, NULL);
+ ret = update_lpi_config(kvm, irq, NULL, false);
if (ret)
return ERR_PTR(ret);
@@ -273,7 +273,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
* VCPU. Unconditionally applies if filter_vcpu is NULL.
*/
static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
- struct kvm_vcpu *filter_vcpu)
+ struct kvm_vcpu *filter_vcpu, bool needs_inv)
{
u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
u8 prop;
@@ -292,11 +292,17 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
irq->priority = LPI_PROP_PRIORITY(prop);
irq->enabled = LPI_PROP_ENABLE_BIT(prop);
- vgic_queue_irq_unlock(kvm, irq, flags);
- } else {
- spin_unlock_irqrestore(&irq->irq_lock, flags);
+ if (!irq->hw) {
+ vgic_queue_irq_unlock(kvm, irq, flags);
+ return 0;
+ }
}
+ spin_unlock_irqrestore(&irq->irq_lock, flags);
+
+ if (irq->hw)
+ return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
+
return 0;
}
@@ -336,6 +342,29 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
return i;
}
+static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
+{
+ int ret = 0;
+
+ spin_lock(&irq->irq_lock);
+ irq->target_vcpu = vcpu;
+ spin_unlock(&irq->irq_lock);
+
+ if (irq->hw) {
+ struct its_vlpi_map map;
+
+ ret = its_get_vlpi(irq->host_irq, &map);
+ if (ret)
+ return ret;
+
+ map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+
+ ret = its_map_vlpi(irq->host_irq, &map);
+ }
+
+ return ret;
+}
+
/*
* Promotes the ITS view of affinity of an ITTE (which redistributor this LPI
* is targeting) to the VGIC's view, which deals with target VCPUs.
@@ -350,10 +379,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
return;
vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
-
- spin_lock(&ite->irq->irq_lock);
- ite->irq->target_vcpu = vcpu;
- spin_unlock(&ite->irq->irq_lock);
+ update_affinity(ite->irq, vcpu);
}
/*
@@ -505,19 +531,11 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
return 0;
}
-/*
- * Find the target VCPU and the LPI number for a given devid/eventid pair
- * and make this IRQ pending, possibly injecting it.
- * Must be called with the its_lock mutex held.
- * Returns 0 on success, a positive error value for any ITS mapping
- * related errors and negative error values for generic errors.
- */
-static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
- u32 devid, u32 eventid)
+int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
+ u32 devid, u32 eventid, struct vgic_irq **irq)
{
struct kvm_vcpu *vcpu;
struct its_ite *ite;
- unsigned long flags;
if (!its->enabled)
return -EBUSY;
@@ -533,26 +551,65 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
if (!vcpu->arch.vgic_cpu.lpis_enabled)
return -EBUSY;
- spin_lock_irqsave(&ite->irq->irq_lock, flags);
- ite->irq->pending_latch = true;
- vgic_queue_irq_unlock(kvm, ite->irq, flags);
-
+ *irq = ite->irq;
return 0;
}
-static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
+struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
{
+ u64 address;
+ struct kvm_io_device *kvm_io_dev;
struct vgic_io_device *iodev;
- if (dev->ops != &kvm_io_gic_ops)
- return NULL;
+ if (!vgic_has_its(kvm))
+ return ERR_PTR(-ENODEV);
- iodev = container_of(dev, struct vgic_io_device, dev);
+ if (!(msi->flags & KVM_MSI_VALID_DEVID))
+ return ERR_PTR(-EINVAL);
+ address = (u64)msi->address_hi << 32 | msi->address_lo;
+
+ kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
+ if (!kvm_io_dev)
+ return ERR_PTR(-EINVAL);
+
+ if (kvm_io_dev->ops != &kvm_io_gic_ops)
+ return ERR_PTR(-EINVAL);
+
+ iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
if (iodev->iodev_type != IODEV_ITS)
- return NULL;
+ return ERR_PTR(-EINVAL);
+
+ return iodev->its;
+}
+
+/*
+ * Find the target VCPU and the LPI number for a given devid/eventid pair
+ * and make this IRQ pending, possibly injecting it.
+ * Must be called with the its_lock mutex held.
+ * Returns 0 on success, a positive error value for any ITS mapping
+ * related errors and negative error values for generic errors.
+ */
+static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
+ u32 devid, u32 eventid)
+{
+ struct vgic_irq *irq = NULL;
+ unsign