summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h472
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kvm/hyperv.c4
-rw-r--r--drivers/hv/channel.c58
-rw-r--r--drivers/hv/channel_mgmt.c439
-rw-r--r--drivers/hv/connection.c58
-rw-r--r--drivers/hv/hv.c16
-rw-r--r--drivers/hv/hv_fcopy.c2
-rw-r--r--drivers/hv/hv_snapshot.c2
-rw-r--r--drivers/hv/hv_trace.h25
-rw-r--r--drivers/hv/hyperv_vmbus.h81
-rw-r--r--drivers/hv/vmbus_drv.c314
-rw-r--r--drivers/net/hyperv/netvsc.c7
-rw-r--r--drivers/pci/controller/pci-hyperv.c44
-rw-r--r--drivers/scsi/storvsc_drv.c96
-rw-r--r--include/asm-generic/hyperv-tlfs.h493
-rw-r--r--include/linux/hyperv.h68
-rw-r--r--include/linux/mod_devicetable.h2
19 files changed, 1309 insertions, 875 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index c2a782015644..660f5326a363 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7924,6 +7924,7 @@ F: drivers/pci/controller/pci-hyperv.c
F: drivers/scsi/storvsc_drv.c
F: drivers/uio/uio_hv_generic.c
F: drivers/video/fbdev/hyperv_fb.c
+F: include/asm-generic/hyperv-tlfs.h
F: include/asm-generic/mshyperv.h
F: include/clocksource/hyperv_timer.h
F: include/linux/hyperv.h
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 29336574d0bc..4e91f6118d5d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -11,17 +11,6 @@
#include <linux/types.h>
#include <asm/page.h>
-
-/*
- * While not explicitly listed in the TLFS, Hyper-V always runs with a page size
- * of 4096. These definitions are used when communicating with Hyper-V using
- * guest physical pages and guest physical page addresses, since the guest page
- * size may not be 4096 on all architectures.
- */
-#define HV_HYP_PAGE_SHIFT 12
-#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT)
-#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1))
-
/*
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
* is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
@@ -39,78 +28,41 @@
#define HYPERV_CPUID_MAX 0x4000ffff
/*
- * Feature identification. EAX indicates which features are available
- * to the partition based upon the current partition privileges.
- * These are HYPERV_CPUID_FEATURES.EAX bits.
+ * Aliases for Group A features that have X64 in the name.
+ * On x86/x64 these are HYPERV_CPUID_FEATURES.EAX bits.
*/
-/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
-#define HV_X64_MSR_VP_RUNTIME_AVAILABLE BIT(0)
-/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
-#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1)
-/*
- * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
- * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
- */
-#define HV_X64_MSR_SYNIC_AVAILABLE BIT(2)
-/*
- * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
- * HV_X64_MSR_STIMER3_COUNT) available
- */
-#define HV_MSR_SYNTIMER_AVAILABLE BIT(3)
-/*
- * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
- * are available
- */
-#define HV_X64_MSR_APIC_ACCESS_AVAILABLE BIT(4)
-/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
-#define HV_X64_MSR_HYPERCALL_AVAILABLE BIT(5)
-/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
-#define HV_X64_MSR_VP_INDEX_AVAILABLE BIT(6)
-/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
-#define HV_X64_MSR_RESET_AVAILABLE BIT(7)
-/*
- * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
- * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
- * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
- */
-#define HV_X64_MSR_STAT_PAGES_AVAILABLE BIT(8)
-/* Partition reference TSC MSR is available */
-#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9)
-/* Partition Guest IDLE MSR is available */
-#define HV_X64_MSR_GUEST_IDLE_AVAILABLE BIT(10)
-/*
- * There is a single feature flag that signifies if the partition has access
- * to MSRs with local APIC and TSC frequencies.
- */
-#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
-/* AccessReenlightenmentControls privilege */
-#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
-/* AccessTscInvariantControls privilege */
-#define HV_X64_ACCESS_TSC_INVARIANT BIT(15)
+#define HV_X64_MSR_VP_RUNTIME_AVAILABLE \
+ HV_MSR_VP_RUNTIME_AVAILABLE
+#define HV_X64_MSR_SYNIC_AVAILABLE \
+ HV_MSR_SYNIC_AVAILABLE
+#define HV_X64_MSR_APIC_ACCESS_AVAILABLE \
+ HV_MSR_APIC_ACCESS_AVAILABLE
+#define HV_X64_MSR_HYPERCALL_AVAILABLE \
+ HV_MSR_HYPERCALL_AVAILABLE
+#define HV_X64_MSR_VP_INDEX_AVAILABLE \
+ HV_MSR_VP_INDEX_AVAILABLE
+#define HV_X64_MSR_RESET_AVAILABLE \
+ HV_MSR_RESET_AVAILABLE
+#define HV_X64_MSR_GUEST_IDLE_AVAILABLE \
+ HV_MSR_GUEST_IDLE_AVAILABLE
+#define HV_X64_ACCESS_FREQUENCY_MSRS \
+ HV_ACCESS_FREQUENCY_MSRS
+#define HV_X64_ACCESS_REENLIGHTENMENT \
+ HV_ACCESS_REENLIGHTENMENT
+#define HV_X64_ACCESS_TSC_INVARIANT \
+ HV_ACCESS_TSC_INVARIANT
/*
- * Feature identification: indicates which flags were specified at partition
- * creation. The format is the same as the partition creation flag structure
- * defined in section Partition Creation Flags.
- * These are HYPERV_CPUID_FEATURES.EBX bits.
+ * Aliases for Group B features that have X64 in the name.
+ * On x86/x64 these are HYPERV_CPUID_FEATURES.EBX bits.
*/
-#define HV_X64_CREATE_PARTITIONS BIT(0)
-#define HV_X64_ACCESS_PARTITION_ID BIT(1)
-#define HV_X64_ACCESS_MEMORY_POOL BIT(2)
-#define HV_X64_ADJUST_MESSAGE_BUFFERS BIT(3)
-#define HV_X64_POST_MESSAGES BIT(4)
-#define HV_X64_SIGNAL_EVENTS BIT(5)
-#define HV_X64_CREATE_PORT BIT(6)
-#define HV_X64_CONNECT_PORT BIT(7)
-#define HV_X64_ACCESS_STATS BIT(8)
-#define HV_X64_DEBUGGING BIT(11)
-#define HV_X64_CPU_POWER_MANAGEMENT BIT(12)
+#define HV_X64_POST_MESSAGES HV_POST_MESSAGES
+#define HV_X64_SIGNAL_EVENTS HV_SIGNAL_EVENTS
/*
- * Feature identification. EDX indicates which miscellaneous features
- * are available to the partition.
- * These are HYPERV_CPUID_FEATURES.EDX bits.
+ * Group D Features. The bit assignments are custom to each architecture.
+ * On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits.
*/
/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
#define HV_X64_MWAIT_AVAILABLE BIT(0)
@@ -187,7 +139,7 @@
* processor, except for virtual processors that are reported as sibling SMT
* threads.
*/
-#define HV_X64_NO_NONARCH_CORESHARING BIT(18)
+#define HV_X64_NO_NONARCH_CORESHARING BIT(18)
/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
#define HV_X64_NESTED_DIRECT_FLUSH BIT(17)
@@ -295,43 +247,6 @@ union hv_x64_msr_hypercall_contents {
} __packed;
};
-/*
- * TSC page layout.
- */
-struct ms_hyperv_tsc_page {
- volatile u32 tsc_sequence;
- u32 reserved1;
- volatile u64 tsc_scale;
- volatile s64 tsc_offset;
- u64 reserved2[509];
-} __packed;
-
-/*
- * The guest OS needs to register the guest ID with the hypervisor.
- * The guest ID is a 64 bit entity and the structure of this ID is
- * specified in the Hyper-V specification:
- *
- * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx
- *
- * While the current guideline does not specify how Linux guest ID(s)
- * need to be generated, our plan is to publish the guidelines for
- * Linux and other guest operating systems that currently are hosted
- * on Hyper-V. The implementation here conforms to this yet
- * unpublished guidelines.
- *
- *
- * Bit(s)
- * 63 - Indicates if the OS is Open Source or not; 1 is Open Source
- * 62:56 - Os Type; Linux is 0x100
- * 55:48 - Distro specific identification
- * 47:16 - Linux kernel version number
- * 15:0 - Distro specific identification
- *
- *
- */
-
-#define HV_LINUX_VENDOR_ID 0x8100
-
struct hv_reenlightenment_control {
__u64 vector:8;
__u64 reserved1:8;
@@ -355,31 +270,12 @@ struct hv_tsc_emulation_status {
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
-/*
- * Crash notification (HV_X64_MSR_CRASH_CTL) flags.
- */
-#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62)
-#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63)
#define HV_X64_MSR_CRASH_PARAMS \
(1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
#define HV_IPI_LOW_VECTOR 0x10
#define HV_IPI_HIGH_VECTOR 0xff
-/* Declare the various hypercall operations. */
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
-#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
-#define HVCALL_SEND_IPI 0x000b
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
-#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
-#define HVCALL_SEND_IPI_EX 0x0015
-#define HVCALL_POST_MESSAGE 0x005c
-#define HVCALL_SIGNAL_EVENT 0x005d
-#define HVCALL_RETARGET_INTERRUPT 0x007e
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
-#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
-
#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
@@ -391,75 +287,6 @@ struct hv_tsc_emulation_status {
#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
-#define HV_PROCESSOR_POWER_STATE_C0 0
-#define HV_PROCESSOR_POWER_STATE_C1 1
-#define HV_PROCESSOR_POWER_STATE_C2 2
-#define HV_PROCESSOR_POWER_STATE_C3 3
-
-#define HV_FLUSH_ALL_PROCESSORS BIT(0)
-#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
-#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
-#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
-
-enum HV_GENERIC_SET_FORMAT {
- HV_GENERIC_SET_SPARSE_4K,
- HV_GENERIC_SET_ALL,
-};
-
-#define HV_PARTITION_ID_SELF ((u64)-1)
-
-#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0)
-#define HV_HYPERCALL_FAST_BIT BIT(16)
-#define HV_HYPERCALL_VARHEAD_OFFSET 17
-#define HV_HYPERCALL_REP_COMP_OFFSET 32
-#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
-#define HV_HYPERCALL_REP_START_OFFSET 48
-#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48)
-
-/* hypercall status code */
-#define HV_STATUS_SUCCESS 0
-#define HV_STATUS_INVALID_HYPERCALL_CODE 2
-#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
-#define HV_STATUS_INVALID_ALIGNMENT 4
-#define HV_STATUS_INVALID_PARAMETER 5
-#define HV_STATUS_INSUFFICIENT_MEMORY 11
-#define HV_STATUS_INVALID_PORT_ID 17
-#define HV_STATUS_INVALID_CONNECTION_ID 18
-#define HV_STATUS_INSUFFICIENT_BUFFERS 19
-
-/*
- * The Hyper-V TimeRefCount register and the TSC
- * page provide a guest VM clock with 100ns tick rate
- */
-#define HV_CLOCK_HZ (NSEC_PER_SEC/100)
-
-typedef struct _HV_REFERENCE_TSC_PAGE {
- __u32 tsc_sequence;
- __u32 res1;
- __u64 tsc_scale;
- __s64 tsc_offset;
-} __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
-
-/* Define the number of synthetic interrupt sources. */
-#define HV_SYNIC_SINT_COUNT (16)
-/* Define the expected SynIC version. */
-#define HV_SYNIC_VERSION_1 (0x1)
-/* Valid SynIC vectors are 16-255. */
-#define HV_SYNIC_FIRST_VALID_VECTOR (16)
-
-#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0)
-#define HV_SYNIC_SIMP_ENABLE (1ULL << 0)
-#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0)
-#define HV_SYNIC_SINT_MASKED (1ULL << 16)
-#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17)
-#define HV_SYNIC_SINT_VECTOR_MASK (0xFF)
-
-#define HV_SYNIC_STIMER_COUNT (4)
-
-/* Define synthetic interrupt controller message constants. */
-#define HV_MESSAGE_SIZE (256)
-#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240)
-#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30)
/* Define hypervisor message types. */
enum hv_message_type {
@@ -470,76 +297,25 @@ enum hv_message_type {
HVMSG_GPA_INTERCEPT = 0x80000001,
/* Timer notification messages. */
- HVMSG_TIMER_EXPIRED = 0x80000010,
+ HVMSG_TIMER_EXPIRED = 0x80000010,
/* Error messages. */
HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020,
HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021,
- HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
+ HVMSG_UNSUPPORTED_FEATURE = 0x80000022,
/* Trace buffer complete messages. */
HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040,
/* Platform-specific processor intercept messages. */
- HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
+ HVMSG_X64_IOPORT_INTERCEPT = 0x80010000,
HVMSG_X64_MSR_INTERCEPT = 0x80010001,
- HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
+ HVMSG_X64_CPUID_INTERCEPT = 0x80010002,
HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003,
- HVMSG_X64_APIC_EOI = 0x80010004,
- HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
-};
-
-/* Define synthetic interrupt controller message flags. */
-union hv_message_flags {
- __u8 asu8;
- struct {
- __u8 msg_pending:1;
- __u8 reserved:7;
- } __packed;
-};
-
-/* Define port identifier type. */
-union hv_port_id {
- __u32 asu32;
- struct {
- __u32 id:24;
- __u32 reserved:8;
- } __packed u;
+ HVMSG_X64_APIC_EOI = 0x80010004,
+ HVMSG_X64_LEGACY_FP_ERROR = 0x80010005
};
-/* Define synthetic interrupt controller message header. */
-struct hv_message_header {
- __u32 message_type;
- __u8 payload_size;
- union hv_message_flags message_flags;
- __u8 reserved[2];
- union {
- __u64 sender;
- union hv_port_id port;
- };
-} __packed;
-
-/* Define synthetic interrupt controller message format. */
-struct hv_message {
- struct hv_message_header header;
- union {
- __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
- } u;
-} __packed;
-
-/* Define the synthetic interrupt message page layout. */
-struct hv_message_page {
- struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
-} __packed;
-
-/* Define timer message payload structure. */
-struct hv_timer_message_payload {
- __u32 timer_index;
- __u32 reserved;
- __u64 expiration_time; /* When the timer expired */
- __u64 delivery_time; /* When the message was delivered */
-} __packed;
-
struct hv_nested_enlightenments_control {
struct {
__u32 directhypercall:1;
@@ -767,187 +543,11 @@ struct hv_enlightened_vmcs {
#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
-/* Define synthetic interrupt controller flag constants. */
-#define HV_EVENT_FLAGS_COUNT (256 * 8)
-#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long))
-
-/*
- * Synthetic timer configuration.
- */
-union hv_stimer_config {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 periodic:1;
- u64 lazy:1;
- u64 auto_enable:1;
- u64 apic_vector:8;
- u64 direct_mode:1;
- u64 reserved_z0:3;
- u64 sintx:4;
- u64 reserved_z1:44;
- } __packed;
-};
-
-
-/* Define the synthetic interrupt controller event flags format. */
-union hv_synic_event_flags {
- unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT];
-};
-
-/* Define SynIC control register. */
-union hv_synic_scontrol {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:63;
- } __packed;
-};
-
-/* Define synthetic interrupt source. */
-union hv_synic_sint {
- u64 as_uint64;
- struct {
- u64 vector:8;
- u64 reserved1:8;
- u64 masked:1;
- u64 auto_eoi:1;
- u64 polling:1;
- u64 reserved2:45;
- } __packed;
-};
-
-/* Define the format of the SIMP register */
-union hv_synic_simp {
- u64 as_uint64;
- struct {
- u64 simp_enabled:1;
- u64 preserved:11;
- u64 base_simp_gpa:52;
- } __packed;
-};
-
-/* Define the format of the SIEFP register */
-union hv_synic_siefp {
- u64 as_uint64;
- struct {
- u64 siefp_enabled:1;
- u64 preserved:11;
- u64 base_siefp_gpa:52;
- } __packed;
-};
-
-struct hv_vpset {
- u64 format;
- u64 valid_bank_mask;
- u64 bank_contents[];
-} __packed;
-
-/* HvCallSendSyntheticClusterIpi hypercall */
-struct hv_send_ipi {
- u32 vector;
- u32 reserved;
- u64 cpu_mask;
-} __packed;
-
-/* HvCallSendSyntheticClusterIpiEx hypercall */
-struct hv_send_ipi_ex {
- u32 vector;
- u32 reserved;
- struct hv_vpset vp_set;
-} __packed;
-
-/* HvFlushGuestPhysicalAddressSpace hypercalls */
-struct hv_guest_mapping_flush {
- u64 address_space;
- u64 flags;
-} __packed;
-
-/*
- * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited
- * by the bitwidth of "additional_pages" in union hv_gpa_page_range.
- */
-#define HV_MAX_FLUSH_PAGES (2048)
-
-/* HvFlushGuestPhysicalAddressList hypercall */
-union hv_gpa_page_range {
- u64 address_space;
- struct {
- u64 additional_pages:11;
- u64 largepage:1;
- u64 basepfn:52;
- } page;
-};
-
-/*
- * All input flush parameters should be in single page. The max flush
- * count is equal with how many entries of union hv_gpa_page_range can
- * be populated into the input parameter page.
- */
-#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \
- sizeof(union hv_gpa_page_range))
-
-struct hv_guest_mapping_flush_list {
- u64 address_space;
- u64 flags;
- union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT];
-};
-
-/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
-struct hv_tlb_flush {
- u64 address_space;
- u64 flags;
- u64 processor_mask;
- u64 gva_list[];
-} __packed;
-
-/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
-struct hv_tlb_flush_ex {
- u64 address_space;
- u64 flags;
- struct hv_vpset hv_vp_set;
- u64 gva_list[];
-} __packed;
-
struct hv_partition_assist_pg {
u32 tlb_lock_count;
};
-union hv_msi_entry {
- u64 as_uint64;
- struct {
- u32 address;
- u32 data;
- } __packed;
-};
-
-struct hv_interrupt_entry {
- u32 source; /* 1 for MSI(-X) */
- u32 reserved1;
- union hv_msi_entry msi_entry;
-} __packed;
-/*
- * flags for hv_device_interrupt_target.flags
- */
-#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1
-#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2
-
-struct hv_device_interrupt_target {
- u32 vector;
- u32 flags;
- union {
- u64 vp_mask;
- struct hv_vpset vp_set;
- };
-} __packed;
+#include <asm-generic/hyperv-tlfs.h>
-/* HvRetargetDeviceInterrupt hypercall */
-struct hv_retarget_device_interrupt {
- u64 partition_id; /* use "self" */
- u64 device_id;
- struct hv_interrupt_entry int_entry;
- u64 reserved2;
- struct hv_device_interrupt_target int_target;
-} __packed __aligned(8);
#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e94b3de564d6..1c0b62d26962 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -866,7 +866,7 @@ struct kvm_hv {
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
u64 hv_crash_ctl;
- HV_REFERENCE_TSC_PAGE tsc_ref;
+ struct ms_hyperv_tsc_page tsc_ref;
struct idr conn_to_evt;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 54d4b98b49e1..6bc6d7613f76 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -900,7 +900,7 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu,
* These two equivalencies are implemented in this function.
*/
static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
- HV_REFERENCE_TSC_PAGE *tsc_ref)
+ struct ms_hyperv_tsc_page *tsc_ref)
{
u64 max_mul;
@@ -941,7 +941,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
u64 gfn;
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
- BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0);
+ BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
return;
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c
index 23f358cb7f49..90070b337c10 100644
--- a/drivers/hv/channel.c
+++ b/drivers/hv/channel.c
@@ -290,6 +290,34 @@ int vmbus_send_tl_connect_request(const guid_t *shv_guest_servie_id,
EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request);
/*
+ * Set/change the vCPU (@target_vp) the channel (@child_relid) will interrupt.
+ *
+ * CHANNELMSG_MODIFYCHANNEL messages are aynchronous. Also, Hyper-V does not
+ * ACK such messages. IOW we can't know when the host will stop interrupting
+ * the "old" vCPU and start interrupting the "new" vCPU for the given channel.
+ *
+ * The CHANNELMSG_MODIFYCHANNEL message type is supported since VMBus version
+ * VERSION_WIN10_V4_1.
+ */
+int vmbus_send_modifychannel(u32 child_relid, u32 target_vp)
+{
+ struct vmbus_channel_modifychannel conn_msg;
+ int ret;
+
+ memset(&conn_msg, 0, sizeof(conn_msg));
+ conn_msg.header.msgtype = CHANNELMSG_MODIFYCHANNEL;
+ conn_msg.child_relid = child_relid;
+ conn_msg.target_vp = target_vp;
+
+ ret = vmbus_post_msg(&conn_msg, sizeof(conn_msg), true);
+
+ trace_vmbus_send_modifychannel(&conn_msg, ret);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(vmbus_send_modifychannel);
+
+/*
* create_gpadl_header - Creates a gpadl for the specified buffer
*/
static int create_gpadl_header(void *kbuffer, u32 size,
@@ -594,35 +622,31 @@ post_msg_err:
}
EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
-static void reset_channel_cb(void *arg)
-{
- struct vmbus_channel *channel = arg;
-
- channel->onchannel_callback = NULL;
-}
-
void vmbus_reset_channel_cb(struct vmbus_channel *channel)
{
+ unsigned long flags;
+
/*
* vmbus_on_event(), running in the per-channel tasklet, can race
* with vmbus_close_internal() in the case of SMP guest, e.g., when
* the former is accessing channel->inbound.ring_buffer, the latter
* could be freeing the ring_buffer pages, so here we must stop it
* first.
+ *
+ * vmbus_chan_sched() might call the netvsc driver callback function
+ * that ends up scheduling NAPI work that accesses the ring buffer.
+ * At this point, we have to ensure that any such work is completed
+ * and that the channel ring buffer is no longer being accessed, cf.
+ * the calls to napi_disable() in netvsc_device_remove().
*/
tasklet_disable(&channel->callback_event);
- channel->sc_creation_callback = NULL;
+ /* See the inline comments in vmbus_chan_sched(). */
+ spin_lock_irqsave(&channel->sched_lock, flags);
+ channel->onchannel_callback = NULL;
+ spin_unlock_irqrestore(&channel->sched_lock, flags);
- /* Stop the callback asap */
- if (channel->target_cpu != get_cpu()) {
- put_cpu();
- smp_call_function_single(channel->target_cpu, reset_channel_cb,
- channel, true);
- } else {
- reset_channel_cb(channel);
- put_cpu();
- }
+ channel->sc_creation_callback = NULL;
/* Re-enable tasklet for use on re-open */
tasklet_enable(&channel->callback_event);
diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index 501c43c5851d..417a95e5094d 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -18,14 +18,15 @@
#include <linux/module.h>
#include <linux/completion.h>
#include <linux/delay.h>
+#include <linux/cpu.h>
#include <linux/hyperv.h>
#include <asm/mshyperv.h>
#include "hyperv_vmbus.h"
-static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);
+static void init_vp_index(struct vmbus_channel *channel);
-static const struct vmbus_device vmbus_devs[] = {
+const struct vmbus_device vmbus_devs[] = {
/* IDE */
{ .dev_type = HV_IDE,
HV_IDE_GUID,
@@ -315,11 +316,11 @@ static struct vmbus_channel *alloc_channel(void)
if (!channel)
return NULL;
+ spin_lock_init(&channel->sched_lock);
spin_lock_init(&channel->lock);
init_completion(&channel->rescind_event);
INIT_LIST_HEAD(&channel->sc_list);
- INIT_LIST_HEAD(&channel->percpu_list);
tasklet_init(&channel->callback_event,
vmbus_on_event, (unsigned long)channel);
@@ -340,23 +341,49 @@ static void free_channel(struct vmbus_channel *channel)
kobject_put(&channel->kobj);
}
-static void percpu_channel_enq(void *arg)
+void vmbus_channel_map_relid(struct vmbus_channel *channel)
{
- struct vmbus_channel *channel = arg;
- struct hv_per_cpu_context *hv_cpu
- = this_cpu_ptr(hv_context.cpu_context);
-
- list_add_tail_rcu(&channel->percpu_list, &hv_cpu->chan_list);
+ if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
+ return;
+ /*
+ * The mapping of the channel's relid is visible from the CPUs that
+ * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
+ * execute:
+ *
+ * (a) In the "normal (i.e., not resuming from hibernation)" path,
+ * the full barrier in smp_store_mb() guarantees that the store
+ * is propagated to all CPUs before the add_channel_work work
+ * is queued. In turn, add_channel_work is queued before the
+ * channel's ring buffer is allocated/initialized and the
+ * OPENCHANNEL message for the channel is sent in vmbus_open().
+ * Hyper-V won't start sending the interrupts for the channel
+ * before the OPENCHANNEL message is acked. The memory barrier
+ * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
+ * that vmbus_chan_sched() must find the channel's relid in
+ * recv_int_page before retrieving the channel pointer from the
+ * array of channels.
+ *
+ * (b) In the "resuming from hibernation" path, the smp_store_mb()
+ * guarantees that the store is propagated to all CPUs before
+ * the VMBus connection is marked as ready for the resume event
+ * (cf. check_ready_for_resume_event()). The interrupt handler
+ * of the VMBus driver and vmbus_chan_sched() can not run before
+ * vmbus_bus_resume() has completed execution (cf. resume_noirq).
+ */
+ smp_store_mb(
+ vmbus_connection.channels[channel->offermsg.child_relid],
+ channel);
}
-static void percpu_channel_deq(void *arg)
+void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
{
- struct vmbus_channel *channel = arg;
-
- list_del_rcu(&channel->percpu_list);
+ if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
+ return;
+ WRITE_ONCE(
+ vmbus_connection.channels[channel->offermsg.child_relid],
+ NULL);
}
-
static void vmbus_release_relid(u32 relid)
{
struct vmbus_channel_relid_released msg;
@@ -373,39 +400,43 @@ static void vmbus_release_relid(u32 relid)
void hv_process_channel_removal(struct vmbus_channel *channel)
{
- struct vmbus_channel *primary_channel;
unsigned long flags;
- BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
+ lockdep_assert_held(&vmbus_connection.channel_mutex);
BUG_ON(!channel->rescind);
- if (channel->target_cpu != get_cpu()) {
- put_cpu();
- smp_call_function_single(channel->target_cpu,
- percpu_channel_deq, channel, true);
- } else {
- percpu_channel_deq(channel);
- put_cpu();
- }
+ /*
+ * hv_process_channel_removal() could find INVALID_RELID only for
+ * hv_sock channels. See the inline comments in vmbus_onoffer().
+ */
+ WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
+ !is_hvsock_channel(channel));
+
+ /*
+ * Upon suspend, an in-use hv_sock channel is removed from the array of
+ * channels and the relid is invalidated. After hibernation, when the
+ * user-space appplication destroys the channel, it's unnecessary and
+ * unsafe to remove the channel from the array of channels. See also
+ * the inline comments before the call of vmbus_release_relid() below.
+ */
+ if (channel->offermsg.child_relid != INVALID_RELID)
+ vmbus_channel_unmap_relid(channel);
if (channel->primary_channel == NULL) {
list_del(&channel->listentry);
-
- primary_channel = channel;
} else {
- primary_channel = channel->primary_channel;
+ struct vmbus_channel *primary_channel = channel->primary_channel;
spin_lock_irqsave(&primary_channel->lock, flags);
list_del(&channel->sc_list);
spin_unlock_irqrestore(&primary_channel->lock, flags);
}
/*
- * We need to free the bit for init_vp_index() to work in the case
- * of sub-channel, when we reload drivers like hv_netvsc.
+ * If this is a "perf" channel, updates the hv_numa_map[] masks so that
+ * init_vp_index() can (re-)use the CPU.
*/
- if (channel->affinity_policy == HV_LOCALIZED)
- cpumask_clear_cpu(channel->target_cpu,
- &primary_channel->alloced_cpus_in_node);
+ if (hv_is_perf_channel(channel))
+ hv_clear_alloced_cpu(channel->target_cpu);
/*
* Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
@@ -440,23 +471,8 @@ static void vmbus_add_channel_work(struct work_struct *work)
container_of(work, struct vmbus_channel, add_channel_work);
struct vmbus_channel *primary_channel = newchannel->primary_channel;
unsigned long flags;
- u16 dev_type;
int ret;
- dev_type = hv_get_dev_type(newchannel);
-
- init_vp_index(newchannel, dev_type);
-
- if (newchannel->target_cpu != get_cpu()) {
- put_cpu();
- smp_call_function_single(newchannel->target_cpu,
- percpu_channel_enq,
- newchannel, true);
- } else {
- percpu_channel_enq(newchannel);
- put_cpu();
- }
-
/*
* This state is used to indicate a successful open
* so that when we do close the channel normally, we
@@ -488,7 +504,7 @@ static void vmbus_add_channel_work(struct work_struct *work)
if (!newchannel->device_obj)
goto err_deq_chan;
- newchannel->device_obj->device_id = dev_type;
+ newchannel->device_obj->device_id = newchannel->device_id;
/*
* Add the new device to the bus. This will kick off device-driver
* binding which eventually invokes the device driver's AddDevice()
@@ -523,17 +539,10 @@ err_deq_chan:
spin_unlock_irqrestore(&primary_channel->lock, flags);
}
- mutex_unlock(&vmbus_connection.channel_mutex);
+ /* vmbus_process_offer() has mapped the channel. */
+ vmbus_channel_unmap_relid(newchannel);
- if (newchannel->target_cpu != get_cpu()) {
- put_cpu();
- smp_call_function_single(newchannel->target_cpu,
- percpu_channel_deq,
- newchannel, true);
- } else {
- percpu_channel_deq(newchannel);
- put_cpu();
- }
+ mutex_unlock(&vmbus_connection.channel_mutex);
vmbus_release_relid(newchannel->offermsg.child_relid);
@@ -551,8 +560,35 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
unsigned long flags;
bool fnew = true;
+ /*
+ * Synchronize vmbus_process_offer() and CPU hotplugging:
+ *
+ * CPU1 CPU2
+ *
+ * [vmbus_process_offer()] [Hot removal of the CPU]
+ *
+ * CPU_READ_LOCK CPUS_WRITE_LOCK
+ * LOAD cpu_online_mask SEARCH chn_list
+ * STORE target_cpu LOAD target_cpu
+ * INSERT chn_list STORE cpu_online_mask
+ * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK
+ *
+ * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
+ * CPU2's SEARCH from *not* seeing CPU1's INSERT
+ *
+ * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
+ * CPU2's LOAD from *not* seing CPU1's STORE
+ */
+ cpus_read_lock();
+
+ /*
+ * Serializes the modifications of the chn_list list as well as
+ * the accesses to next_numa_node_id in init_vp_index().
+ */
mutex_lock(&vmbus_connection.channel_mutex);
+ init_vp_index(newchannel);
+
/* Remember the channels that should be cleaned up upon suspend. */
if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
@@ -599,7 +635,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
spin_unlock_irqrestore(&channel->lock, flags);
}