From 1b807e1011af46a595ba46c75ad5e20ad7177af7 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Mon, 21 Dec 2015 15:12:20 -0800 Subject: Drivers: hv: vmbus: Cleanup vmbus_set_event() Cleanup vmbus_set_event() by inlining the hypercall to post the event and since the return value of vmbus_set_event() is not checked, make it void. As part of this cleanup, get rid of the function hv_signal_event() as it is only callled from vmbus_set_event(). Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/connection.c | 4 ++-- drivers/hv/hv.c | 16 ---------------- drivers/hv/hyperv_vmbus.h | 4 +--- 3 files changed, 3 insertions(+), 21 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 3dc5a9c7fad6..4a320e60641a 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -474,7 +474,7 @@ int vmbus_post_msg(void *buffer, size_t buflen) /* * vmbus_set_event - Send an event notification to the parent */ -int vmbus_set_event(struct vmbus_channel *channel) +void vmbus_set_event(struct vmbus_channel *channel) { u32 child_relid = channel->offermsg.child_relid; @@ -485,5 +485,5 @@ int vmbus_set_event(struct vmbus_channel *channel) (child_relid >> 5)); } - return hv_signal_event(channel->sig_event); + hv_do_hypercall(HVCALL_SIGNAL_EVENT, channel->sig_event, NULL); } diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 11bca51ef5ff..1c677d0f16d6 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -337,22 +337,6 @@ int hv_post_message(union hv_connection_id connection_id, return status & 0xFFFF; } - -/* - * hv_signal_event - - * Signal an event on the specified connection using the hypervisor event IPC. - * - * This involves a hypercall. - */ -int hv_signal_event(void *con_id) -{ - u64 status; - - status = hv_do_hypercall(HVCALL_SIGNAL_EVENT, con_id, NULL); - - return status & 0xFFFF; -} - static int hv_ce_set_next_event(unsigned long delta, struct clock_event_device *evt) { diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 4ebc796b4f33..ac7aa303c37d 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -501,8 +501,6 @@ extern int hv_post_message(union hv_connection_id connection_id, enum hv_message_type message_type, void *payload, size_t payload_size); -extern int hv_signal_event(void *con_id); - extern int hv_synic_alloc(void); extern void hv_synic_free(void); @@ -650,7 +648,7 @@ void vmbus_disconnect(void); int vmbus_post_msg(void *buffer, size_t buflen); -int vmbus_set_event(struct vmbus_channel *channel); +void vmbus_set_event(struct vmbus_channel *channel); void vmbus_on_event(unsigned long data); -- cgit v1.2.3 From 7047f17d70fc0599563d30d0791692cb5fe42ae6 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 25 Dec 2015 20:00:30 -0800 Subject: Drivers: hv: vmbus: Add vendor and device atttributes Add vendor and device attributes to VMBUS devices. These will be used by Hyper-V tools as well user-level RDMA libraries that will use the vendor/device tuple to discover the RDMA device. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 166 ++++++++++++++++++++++++++++++++++------------ drivers/hv/vmbus_drv.c | 21 ++++++ 2 files changed, 144 insertions(+), 43 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 1c1ad47042c5..107d72f9834d 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -32,8 +32,122 @@ #include "hyperv_vmbus.h" -static void init_vp_index(struct vmbus_channel *channel, - const uuid_le *type_guid); +static void init_vp_index(struct vmbus_channel *channel, u16 dev_type); + +static const struct vmbus_device vmbus_devs[] = { + /* IDE */ + { .dev_type = HV_IDE, + HV_IDE_GUID, + .perf_device = true, + }, + + /* SCSI */ + { .dev_type = HV_SCSI, + HV_SCSI_GUID, + .perf_device = true, + }, + + /* Fibre Channel */ + { .dev_type = HV_FC, + HV_SYNTHFC_GUID, + .perf_device = true, + }, + + /* Synthetic NIC */ + { .dev_type = HV_NIC, + HV_NIC_GUID, + .perf_device = true, + }, + + /* Network Direct */ + { .dev_type = HV_ND, + HV_ND_GUID, + .perf_device = true, + }, + + /* PCIE */ + { .dev_type = HV_PCIE, + HV_PCIE_GUID, + .perf_device = true, + }, + + /* Synthetic Frame Buffer */ + { .dev_type = HV_FB, + HV_SYNTHVID_GUID, + .perf_device = false, + }, + + /* Synthetic Keyboard */ + { .dev_type = HV_KBD, + HV_KBD_GUID, + .perf_device = false, + }, + + /* Synthetic MOUSE */ + { .dev_type = HV_MOUSE, + HV_MOUSE_GUID, + .perf_device = false, + }, + + /* KVP */ + { .dev_type = HV_KVP, + HV_KVP_GUID, + .perf_device = false, + }, + + /* Time Synch */ + { .dev_type = HV_TS, + HV_TS_GUID, + .perf_device = false, + }, + + /* Heartbeat */ + { .dev_type = HV_HB, + HV_HEART_BEAT_GUID, + .perf_device = false, + }, + + /* Shutdown */ + { .dev_type = HV_SHUTDOWN, + HV_SHUTDOWN_GUID, + .perf_device = false, + }, + + /* File copy */ + { .dev_type = HV_FCOPY, + HV_FCOPY_GUID, + .perf_device = false, + }, + + /* Backup */ + { .dev_type = HV_BACKUP, + HV_VSS_GUID, + .perf_device = false, + }, + + /* Dynamic Memory */ + { .dev_type = HV_DM, + HV_DM_GUID, + .perf_device = false, + }, + + /* Unknown GUID */ + { .dev_type = HV_UNKOWN, + .perf_device = false, + }, +}; + +static u16 hv_get_dev_type(const uuid_le *guid) +{ + u16 i; + + for (i = HV_IDE; i < HV_UNKOWN; i++) { + if (!uuid_le_cmp(*guid, vmbus_devs[i].guid)) + return i; + } + pr_info("Unknown GUID: %pUl\n", guid); + return i; +} /** * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message @@ -251,6 +365,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) struct vmbus_channel *channel; bool fnew = true; unsigned long flags; + u16 dev_type; /* Make sure this is a new offer */ mutex_lock(&vmbus_connection.channel_mutex); @@ -288,7 +403,9 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) goto err_free_chan; } - init_vp_index(newchannel, &newchannel->offermsg.offer.if_type); + dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type); + + init_vp_index(newchannel, dev_type); if (newchannel->target_cpu != get_cpu()) { put_cpu(); @@ -325,6 +442,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) if (!newchannel->device_obj) goto err_deq_chan; + newchannel->device_obj->device_id = dev_type; /* * Add the new device to the bus. This will kick off device-driver * binding which eventually invokes the device driver's AddDevice() @@ -358,37 +476,6 @@ err_free_chan: free_channel(newchannel); } -enum { - IDE = 0, - SCSI, - FC, - NIC, - ND_NIC, - PCIE, - MAX_PERF_CHN, -}; - -/* - * This is an array of device_ids (device types) that are performance critical. - * We attempt to distribute the interrupt load for these devices across - * all available CPUs. - */ -static const struct hv_vmbus_device_id hp_devs[] = { - /* IDE */ - { HV_IDE_GUID, }, - /* Storage - SCSI */ - { HV_SCSI_GUID, }, - /* Storage - FC */ - { HV_SYNTHFC_GUID, }, - /* Network */ - { HV_NIC_GUID, }, - /* NetworkDirect Guest RDMA */ - { HV_ND_GUID, }, - /* PCI Express Pass Through */ - { HV_PCIE_GUID, }, -}; - - /* * We use this state to statically distribute the channel interrupt load. */ @@ -405,22 +492,15 @@ static int next_numa_node_id; * For pre-win8 hosts or non-performance critical channels we assign the * first CPU in the first NUMA node. */ -static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid) +static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) { u32 cur_cpu; - int i; - bool perf_chn = false; + bool perf_chn = vmbus_devs[dev_type].perf_device; struct vmbus_channel *primary = channel->primary_channel; int next_node; struct cpumask available_mask; struct cpumask *alloced_mask; - for (i = IDE; i < MAX_PERF_CHN; i++) { - if (!uuid_le_cmp(*type_guid, hp_devs[i].guid)) { - perf_chn = true; - break; - } - } if ((vmbus_proto_version == VERSION_WS2008) || (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) { /* diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 328e4c3808e0..3668a95778ec 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -477,6 +477,24 @@ static ssize_t channel_vp_mapping_show(struct device *dev, } static DEVICE_ATTR_RO(channel_vp_mapping); +static ssize_t vendor_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + return sprintf(buf, "0x%x\n", hv_dev->vendor_id); +} +static DEVICE_ATTR_RO(vendor); + +static ssize_t device_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + return sprintf(buf, "0x%x\n", hv_dev->device_id); +} +static DEVICE_ATTR_RO(device); + /* Set up per device attributes in /sys/bus/vmbus/devices/ */ static struct attribute *vmbus_attrs[] = { &dev_attr_id.attr, @@ -502,6 +520,8 @@ static struct attribute *vmbus_attrs[] = { &dev_attr_in_read_bytes_avail.attr, &dev_attr_in_write_bytes_avail.attr, &dev_attr_channel_vp_mapping.attr, + &dev_attr_vendor.attr, + &dev_attr_device.attr, NULL, }; ATTRIBUTE_GROUPS(vmbus); @@ -957,6 +977,7 @@ struct hv_device *vmbus_device_create(const uuid_le *type, memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le)); memcpy(&child_device_obj->dev_instance, instance, sizeof(uuid_le)); + child_device_obj->vendor_id = 0x1414; /* MSFT vendor ID */ return child_device_obj; -- cgit v1.2.3 From 79fd8e706637a5c7c41f9498fe0fbfb437abfdc8 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 27 Jan 2016 22:29:34 -0800 Subject: Drivers: hv: vmbus: avoid infinite loop in init_vp_index() When we pick a CPU to use for a new subchannel we try find a non-used one on the appropriate NUMA node, we keep track of them with the primary->alloced_cpus_in_node mask. Under normal circumstances we don't run out of available CPUs but it is possible when we we don't initialize some cpus in Linux, e.g. when we boot with 'nr_cpus=' limitation. Avoid the infinite loop in init_vp_index() by checking that we still have non-used CPUs in the alloced_cpus_in_node mask and resetting it in case we don't. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 107d72f9834d..af1d82eb8ecf 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -549,6 +549,17 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) cpumask_of_node(primary->numa_node)); cur_cpu = -1; + + /* + * Normally Hyper-V host doesn't create more subchannels than there + * are VCPUs on the node but it is possible when not all present VCPUs + * on the node are initialized by guest. Clear the alloced_cpus_in_node + * to start over. + */ + if (cpumask_equal(&primary->alloced_cpus_in_node, + cpumask_of_node(primary->numa_node))) + cpumask_clear(&primary->alloced_cpus_in_node); + while (true) { cur_cpu = cpumask_next(cur_cpu, &available_mask); if (cur_cpu >= nr_cpu_ids) { -- cgit v1.2.3 From 415719160de3fae3bb9cbc617664649919cd00d0 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 27 Jan 2016 22:29:35 -0800 Subject: Drivers: hv: vmbus: avoid scheduling in interrupt context in vmbus_initiate_unload() We have to call vmbus_initiate_unload() on crash to make kdump work but the crash can also be happening in interrupt (e.g. Sysrq + c results in such) where we can't schedule or the following will happen: [ 314.905786] bad: scheduling from the idle thread! Just skipping the wait (and even adding some random wait here) won't help: to make host-side magic working we're supposed to receive CHANNELMSG_UNLOAD (and actually confirm the fact that we received it) but we can't use interrupt-base path (vmbus_isr()-> vmbus_on_msg_dpc()). Implement a simple busy wait ignoring all the other messages and use it if we're in an interrupt context. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index af1d82eb8ecf..d6c611457601 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "hyperv_vmbus.h" @@ -589,6 +590,40 @@ static void init_vp_index(struct vmbus_channel *channel, u16 dev_type) channel->target_vp = hv_context.vp_index[cur_cpu]; } +static void vmbus_wait_for_unload(void) +{ + int cpu = smp_processor_id(); + void *page_addr = hv_context.synic_message_page[cpu]; + struct hv_message *msg = (struct hv_message *)page_addr + + VMBUS_MESSAGE_SINT; + struct vmbus_channel_message_header *hdr; + bool unloaded = false; + + while (1) { + if (msg->header.message_type == HVMSG_NONE) { + mdelay(10); + continue; + } + + hdr = (struct vmbus_channel_message_header *)msg->u.payload; + if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) + unloaded = true; + + msg->header.message_type = HVMSG_NONE; + /* + * header.message_type needs to be written before we do + * wrmsrl() below. + */ + mb(); + + if (msg->header.message_flags.msg_pending) + wrmsrl(HV_X64_MSR_EOM, 0); + + if (unloaded) + break; + } +} + /* * vmbus_unload_response - Handler for the unload response. */ @@ -614,7 +649,14 @@ void vmbus_initiate_unload(void) hdr.msgtype = CHANNELMSG_UNLOAD; vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header)); - wait_for_completion(&vmbus_connection.unload_event); + /* + * vmbus_initiate_unload() is also called on crash and the crash can be + * happening in an interrupt context, where scheduling is impossible. + */ + if (!in_interrupt()) + wait_for_completion(&vmbus_connection.unload_event); + else + vmbus_wait_for_unload(); } /* -- cgit v1.2.3 From 3ccb4fd8f492f99aece21acc1bd6142275f26236 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 27 Jan 2016 22:29:36 -0800 Subject: Drivers: hv: vmbus: don't manipulate with clocksources on crash clocksource_change_rating() involves mutex usage and can't be called in interrupt context. It also makes sense to avoid doing redundant work on crash. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 1c677d0f16d6..ccb335f57c88 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -295,8 +295,14 @@ void hv_cleanup(void) * Cleanup the TSC page based CS. */ if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { - clocksource_change_rating(&hyperv_cs_tsc, 10); - clocksource_unregister(&hyperv_cs_tsc); + /* + * Crash can happen in an interrupt context and unregistering + * a clocksource is impossible and redundant in this case. + */ + if (!oops_in_progress) { + clocksource_change_rating(&hyperv_cs_tsc, 10); + clocksource_unregister(&hyperv_cs_tsc); + } hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); -- cgit v1.2.3 From 5f363bc38f810d238d1e8b19998625ddec3b8138 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 27 Jan 2016 22:29:39 -0800 Subject: Drivers: hv: vmbus: vmbus_sendpacket_ctl: hvsock: avoid unnecessary signaling When the hvsock channel's outbound ringbuffer is full (i.e., hv_ringbuffer_write() returns -EAGAIN), we should avoid the unnecessary signaling the host. Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 1161d68a1863..3f0453302146 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -659,6 +659,9 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, * If we cannot write to the ring-buffer; signal the host * even if we may not have written anything. This is a rare * enough condition that it should not matter. + * NOTE: in this case, the hvsock channel is an exception, because + * it looks the host side's hvsock implementation has a throttling + * mechanism which can hurt the performance otherwise. */ if (channel->signal_policy) @@ -666,7 +669,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, else kick_q = true; - if (((ret == 0) && kick_q && signal) || (ret)) + if (((ret == 0) && kick_q && signal) || + (ret && !is_hvsock_channel(channel))) vmbus_setevent(channel); return ret; -- cgit v1.2.3 From 5c23a1a5c60b0f472cfa61cd7d8279f8aaeb5b64 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 27 Jan 2016 22:29:40 -0800 Subject: Drivers: hv: vmbus: define a new VMBus message type for hvsock A function to send the type of message is also added. The coming net/hvsock driver will use this function to proactively request the host to offer a VMBus channel for a new hvsock connection. Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 15 +++++++++++++++ drivers/hv/channel_mgmt.c | 4 ++++ 2 files changed, 19 insertions(+) (limited to 'drivers/hv') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 3f0453302146..fcab234796ef 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -219,6 +219,21 @@ error0: } EXPORT_SYMBOL_GPL(vmbus_open); +/* Used for Hyper-V Socket: a guest client's connect() to the host */ +int vmbus_send_tl_connect_request(const uuid_le *shv_guest_servie_id, + const uuid_le *shv_host_servie_id) +{ + struct vmbus_channel_tl_connect_request conn_msg; + + memset(&conn_msg, 0, sizeof(conn_msg)); + conn_msg.header.msgtype = CHANNELMSG_TL_CONNECT_REQUEST; + conn_msg.guest_endpoint_id = *shv_guest_servie_id; + conn_msg.host_service_id = *shv_host_servie_id; + + return vmbus_post_msg(&conn_msg, sizeof(conn_msg)); +} +EXPORT_SYMBOL_GPL(vmbus_send_tl_connect_request); + /* * create_gpadl_header - Creates a gpadl for the specified buffer */ diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index d6c611457601..60ca25b93b4c 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -958,6 +958,10 @@ struct vmbus_channel_message_table_entry {CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response}, {CHANNELMSG_UNLOAD, 0, NULL}, {CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response}, + {CHANNELMSG_18, 0, NULL}, + {CHANNELMSG_19, 0, NULL}, + {CHANNELMSG_20, 0, NULL}, + {CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL}, }; /* -- cgit v1.2.3 From 8981da320a11217589aa3c50f9e891bcdef07ece Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 27 Jan 2016 22:29:41 -0800 Subject: Drivers: hv: vmbus: add a hvsock flag in struct hv_driver Only the coming hv_sock driver has a "true" value for this flag. We treat the hvsock offers/channels as special VMBus devices. Since the hv_sock driver handles all the hvsock offers/channels, we need to tweak vmbus_match() for hv_sock driver, so we introduce this flag. Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/hv') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 3668a95778ec..063e5f53ca78 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -582,6 +582,10 @@ static int vmbus_match(struct device *device, struct device_driver *driver) struct hv_driver *drv = drv_to_hv_drv(driver); struct hv_device *hv_dev = device_to_hv_device(device); + /* The hv_sock driver handles all hv_sock offers. */ + if (is_hvsock_channel(hv_dev->channel)) + return drv->hvsock; + if (hv_vmbus_get_id(drv->id_table, &hv_dev->dev_type)) return 1; -- cgit v1.2.3 From 499e8401a515d04daa986b995da710d2b9737764 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 27 Jan 2016 22:29:42 -0800 Subject: Drivers: hv: vmbus: add a per-channel rescind callback This will be used by the coming hv_sock driver. Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 60ca25b93b4c..76864c98a110 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -741,6 +741,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) spin_unlock_irqrestore(&channel->lock, flags); if (channel->device_obj) { + if (channel->chn_rescind_callback) { + channel->chn_rescind_callback(channel); + return; + } /* * We will have to unregister this device from the * driver core. @@ -1110,3 +1114,10 @@ bool vmbus_are_subchannels_present(struct vmbus_channel *primary) return ret; } EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present); + +void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, + void (*chn_rescind_cb)(struct vmbus_channel *)) +{ + channel->chn_rescind_callback = chn_rescind_cb; +} +EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); -- cgit v1.2.3 From 85d9aa705184a4504d0330017e3956fcdae8a9d6 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 27 Jan 2016 22:29:43 -0800 Subject: Drivers: hv: vmbus: add an API vmbus_hvsock_device_unregister() The hvsock driver needs this API to release all the resources related to the channel. Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 33 ++++++++++++++++++++++++++++----- drivers/hv/connection.c | 4 ++-- 2 files changed, 30 insertions(+), 7 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 76864c98a110..cf311be88cb4 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -310,6 +310,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) vmbus_release_relid(relid); BUG_ON(!channel->rescind); + BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); if (channel->target_cpu != get_cpu()) { put_cpu(); @@ -321,9 +322,7 @@ void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) } if (channel->primary_channel == NULL) { - mutex_lock(&vmbus_connection.channel_mutex); list_del(&channel->listentry); - mutex_unlock(&vmbus_connection.channel_mutex); primary_channel = channel; } else { @@ -367,6 +366,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) bool fnew = true; unsigned long flags; u16 dev_type; + int ret; /* Make sure this is a new offer */ mutex_lock(&vmbus_connection.channel_mutex); @@ -449,7 +449,11 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) * binding which eventually invokes the device driver's AddDevice() * method. */ - if (vmbus_device_register(newchannel->device_obj) != 0) { + mutex_lock(&vmbus_connection.channel_mutex); + ret = vmbus_device_register(newchannel->device_obj); + mutex_unlock(&vmbus_connection.channel_mutex); + + if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", newchannel->offermsg.child_relid); kfree(newchannel->device_obj); @@ -725,6 +729,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) struct device *dev; rescind = (struct vmbus_channel_rescind_offer *)hdr; + + mutex_lock(&vmbus_connection.channel_mutex); channel = relid2channel(rescind->child_relid); if (channel == NULL) { @@ -733,7 +739,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) * vmbus_process_offer(), we have already invoked * vmbus_release_relid() on error. */ - return; + goto out; } spin_lock_irqsave(&channel->lock, flags); @@ -743,7 +749,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) if (channel->device_obj) { if (channel->chn_rescind_callback) { channel->chn_rescind_callback(channel); - return; + goto out; } /* * We will have to unregister this device from the @@ -758,8 +764,25 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) hv_process_channel_removal(channel, channel->offermsg.child_relid); } + +out: + mutex_unlock(&vmbus_connection.channel_mutex); } +void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) +{ + mutex_lock(&vmbus_connection.channel_mutex); + + BUG_ON(!is_hvsock_channel(channel)); + + channel->rescind = true; + vmbus_device_unregister(channel->device_obj); + + mutex_unlock(&vmbus_connection.channel_mutex); +} +EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); + + /* * vmbus_onoffers_delivered - * This is invoked when all offers have been delivered. diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 4a320e60641a..fa86b2cb28b8 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -288,7 +288,8 @@ struct vmbus_channel *relid2channel(u32 relid) struct list_head *cur, *tmp; struct vmbus_channel *cur_sc; - mutex_lock(&vmbus_connection.channel_mutex); + BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); + list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { if (channel->offermsg.child_relid == relid) { found_channel = channel; @@ -307,7 +308,6 @@ struct vmbus_channel *relid2channel(u32 relid) } } } - mutex_unlock(&vmbus_connection.channel_mutex); return found_channel; } -- cgit v1.2.3 From 3eba9a77d5fc2cee486a16fff435686f024f61cf Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 27 Jan 2016 22:29:44 -0800 Subject: Drivers: hv: vmbus: Eliminate the spin lock on the read path The function hv_ringbuffer_read() is called always on a pre-assigned CPU. Each chnnel is bound to a specific CPU and this function is always called on the CPU the channel is bound. There is no need to acquire the spin lock; get rid of this overhead. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/ring_buffer.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index b53702ce692f..1145f3b8e4e0 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -388,7 +388,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, u32 bytes_avail_toread; u32 next_read_location = 0; u64 prev_indices = 0; - unsigned long flags; struct vmpacket_descriptor desc; u32 offset; u32 packetlen; @@ -397,7 +396,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, if (buflen <= 0) return -EINVAL; - spin_lock_irqsave(&inring_info->ring_lock, flags); *buffer_actual_len = 0; *requestid = 0; @@ -412,7 +410,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, * No error is set when there is even no header, drivers are * supposed to analyze buffer_actual_len. */ - goto out_unlock; + return ret; } next_read_location = hv_get_next_read_location(inring_info); @@ -425,15 +423,11 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, *buffer_actual_len = packetlen; *requestid = desc.trans_id; - if (bytes_avail_toread < packetlen + offset) { - ret = -EAGAIN; - goto out_unlock; - } + if (bytes_avail_toread < packetlen + offset) + return -EAGAIN; - if (packetlen > buflen) { - ret = -ENOBUFS; - goto out_unlock; - } + if (packetlen > buflen) + return -ENOBUFS; next_read_location = hv_get_next_readlocation_withoffset(inring_info, offset); @@ -460,7 +454,5 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, *signal = hv_need_to_signal_on_read(bytes_avail_towrite, inring_info); -out_unlock: - spin_unlock_irqrestore(&inring_info->ring_lock, flags); return ret; } -- cgit v1.2.3 From fe760e4d64fe5c17c39e86c410d41f6587ee88bc Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 27 Jan 2016 22:29:45 -0800 Subject: Drivers: hv: vmbus: Give control over how the ring access is serialized On the channel send side, many of the VMBUS device drivers explicity serialize access to the outgoing ring buffer. Give more control to the VMBUS device drivers in terms how to serialize accesss to the outgoing ring buffer. The default behavior will be to aquire the ring lock to preserve the current behavior. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 15 +++++++++++---- drivers/hv/channel_mgmt.c | 1 + drivers/hv/hyperv_vmbus.h | 2 +- drivers/hv/ring_buffer.c | 13 ++++++++----- 4 files changed, 21 insertions(+), 10 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index fcab234796ef..56dd261f7142 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -639,6 +639,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, u64 aligned_data = 0; int ret; bool signal = false; + bool lock = channel->acquire_ring_lock; int num_vecs = ((bufferlen != 0) ? 3 : 1); @@ -658,7 +659,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, bufferlist[2].iov_len = (packetlen_aligned - packetlen); ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, - &signal); + &signal, lock); /* * Signalling the host is conditional on many factors: @@ -738,6 +739,7 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, struct kvec bufferlist[3]; u64 aligned_data = 0; bool signal = false; + bool lock = channel->acquire_ring_lock; if (pagecount > MAX_PAGE_BUFFER_COUNT) return -EINVAL; @@ -774,7 +776,8 @@ int vmbus_sendpacket_pagebuffer_ctl(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, + &signal, lock); /* * Signalling the host is conditional on many factors: @@ -837,6 +840,7 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, struct kvec bufferlist[3]; u64 aligned_data = 0; bool signal = false; + bool lock = channel->acquire_ring_lock; packetlen = desc_size + bufferlen; packetlen_aligned = ALIGN(packetlen, sizeof(u64)); @@ -856,7 +860,8 @@ int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, + &signal, lock); if (ret == 0 && signal) vmbus_setevent(channel); @@ -881,6 +886,7 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, struct kvec bufferlist[3]; u64 aligned_data = 0; bool signal = false; + bool lock = channel->acquire_ring_lock; u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, multi_pagebuffer->len); @@ -919,7 +925,8 @@ int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, + &signal, lock); if (ret == 0 && signal) vmbus_setevent(channel); diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index cf311be88cb4..b40f429aaa13 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -259,6 +259,7 @@ static struct vmbus_channel *alloc_channel(void) return NULL; channel->id = atomic_inc_return(&chan_num); + channel->acquire_ring_lock = true; spin_lock_init(&channel->inbound_lock); spin_lock_init(&channel->lock); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index ac7aa303c37d..b9ea7f59036b 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -529,7 +529,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, struct kvec *kv_list, - u32 kv_count, bool *signal); + u32 kv_count, bool *signal, bool lock); int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer, u32 buflen, u32 *buffer_actual_len, diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 1145f3b8e4e0..5613e2b5cff7 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -314,7 +314,7 @@ void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) /* Write to the ring buffer. */ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, - struct kvec *kv_list, u32 kv_count, bool *signal) + struct kvec *kv_list, u32 kv_count, bool *signal, bool lock) { int i = 0; u32 bytes_avail_towrite; @@ -324,14 +324,15 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, u32 next_write_location; u32 old_write; u64 prev_indices = 0; - unsigned long flags; + unsigned long flags = 0; for (i = 0; i < kv_count; i++) totalbytes_towrite += kv_list[i].iov_len; totalbytes_towrite += sizeof(u64); - spin_lock_irqsave(&outring_info->ring_lock, flags); + if (lock) + spin_lock_irqsave(&outring_info->ring_lock, flags); hv_get_ringbuffer_availbytes(outring_info, &bytes_avail_toread, @@ -343,7 +344,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, * is empty since the read index == write index. */ if (bytes_avail_towrite <= totalbytes_towrite) { - spin_unlock_irqrestore(&outring_info->ring_lock, flags); + if (lock) + spin_unlock_irqrestore(&outring_info->ring_lock, flags); return -EAGAIN; } @@ -374,7 +376,8 @@ int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, hv_set_next_write_location(outring_info, next_write_location); - spin_unlock_irqrestore(&outring_info->ring_lock, flags); + if (lock) + spin_unlock_irqrestore(&outring_info->ring_lock, flags); *signal = hv_need_to_signal(old_write, outring_info); return 0; -- cgit v1.2.3 From 7be3e169444d2c625f15a0b6639252b98d1f226a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 26 Feb 2016 15:13:15 -0800 Subject: Drivers: hv: vmbus: don't loose HVMSG_TIMER_EXPIRED messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We must handle HVMSG_TIMER_EXPIRED messages in the interrupt context and we offload all the rest to vmbus_on_msg_dpc() tasklet. This functions loops to see if there are new messages pending. In case we'll ever see HVMSG_TIMER_EXPIRED message there we're going to lose it as we can't handle it from there. Avoid looping in vmbus_on_msg_dpc(), we're OK with handling one message per interrupt. Signed-off-by: Vitaly Kuznetsov Reviewed-by: Radim Kr.má Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 68 ++++++++++++++++++++++++-------------------------- 1 file changed, 33 insertions(+), 35 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 063e5f53ca78..30ea8ad902e2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -740,51 +740,49 @@ static void vmbus_on_msg_dpc(unsigned long data) struct vmbus_channel_message_table_entry *entry; struct onmessage_work_context *ctx; - while (1) { - if (msg->header.message_type == HVMSG_NONE) - /* no msg */ - break; + if (msg->header.message_type == HVMSG_NONE) + /* no msg */ + return; - hdr = (struct vmbus_channel_message_header *)msg->u.payload; + hdr = (struct vmbus_channel_message_header *)msg->u.payload; - if (hdr->msgtype >= CHANNELMSG_COUNT) { - WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); - goto msg_handled; - } + if (hdr->msgtype >= CHANNELMSG_COUNT) { + WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype); + goto msg_handled; + } - entry = &channel_message_table[hdr->msgtype]; - if (entry->handler_type == VMHT_BLOCKING) { - ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); - if (ctx == NULL) - continue; + entry = &channel_message_table[hdr->msgtype]; + if (entry->handler_type == VMHT_BLOCKING) { + ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC); + if (ctx == NULL) + return; - INIT_WORK(&ctx->work, vmbus_onmessage_work); - memcpy(&ctx->msg, msg, sizeof(*msg)); + INIT_WORK(&ctx->work, vmbus_onmessage_work); + memcpy(&ctx->msg, msg, sizeof(*msg)); - queue_work(vmbus_connection.work_queue, &ctx->work); - } else - entry->message_handler(hdr); + queue_work(vmbus_connection.work_queue, &ctx->work); + } else + entry->message_handler(hdr); msg_handled: - msg->header.message_type = HVMSG_NONE; + msg->header.message_type = HVMSG_NONE; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + mb(); + if (msg->header.message_flags.msg_pending) { /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } + wrmsrl(HV_X64_MSR_EOM, 0); } } -- cgit v1.2.3 From 75ff3a8a9168df750b5bd0589e897a6c0517a9f1 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 26 Feb 2016 15:13:16 -0800 Subject: Drivers: hv: vmbus: avoid wait_for_completion() on crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit wait_for_completion() may sleep, it enables interrupts and this is something we really want to avoid on crashes because interrupt handlers can cause other crashes. Switch to the recently introduced vmbus_wait_for_unload() doing busy wait instead. Reported-by: Radim Krcmar Signed-off-by: Vitaly Kuznetsov Reviewed-by: Radim Kr.má Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 4 ++-- drivers/hv/connection.c | 2 +- drivers/hv/hyperv_vmbus.h | 2 +- drivers/hv/vmbus_drv.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index b40f429aaa13..f70e35278b94 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -641,7 +641,7 @@ static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) complete(&vmbus_connection.unload_event); } -void vmbus_initiate_unload(void) +void vmbus_initiate_unload(bool crash) { struct vmbus_channel_message_header hdr; @@ -658,7 +658,7 @@ void vmbus_initiate_unload(void) * vmbus_initiate_unload() is also called on crash and the crash can be * happening in an interrupt context, where scheduling is impossible. */ - if (!in_interrupt()) + if (!crash) wait_for_completion(&vmbus_connection.unload_event); else vmbus_wait_for_unload(); diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index fa86b2cb28b8..3b6dc0017269 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -236,7 +236,7 @@ void vmbus_disconnect(void) /* * First send the unload request to the host. */ - vmbus_initiate_unload(); + vmbus_initiate_unload(false); if (vmbus_connection.work_queue) { drain_workqueue(vmbus_connection.work_queue); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index b9ea7f59036b..b0299da9c2db 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -663,7 +663,7 @@ void hv_vss_onchannelcallback(void *); int hv_fcopy_init(struct hv_util_service *); void hv_fcopy_deinit(void); void hv_fcopy_onchannelcallback(void *); -void vmbus_initiate_unload(void); +void vmbus_initiate_unload(bool crash); static inline void hv_poll_channel(struct vmbus_channel *channel, void (*cb)(void *)) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 30ea8ad902e2..c8f1671944d5 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1291,7 +1291,7 @@ static void hv_kexec_handler(void) int cpu; hv_synic_clockevents_cleanup(); - vmbus_initiate_unload(); + vmbus_initiate_unload(false); for_each_online_cpu(cpu) smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); hv_cleanup(); @@ -1299,7 +1299,7 @@ static void hv_kexec_handler(void) static void hv_crash_handler(struct pt_regs *regs) { - vmbus_initiate_unload(); + vmbus_initiate_unload(true); /* * In crash handler we can't schedule synic cleanup for all CPUs, * doing the cleanup for current CPU only. This should be sufficient -- cgit v1.2.3 From 0f70b66975ce4331e9002b792d5aa6787a110181 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 26 Feb 2016 15:13:17 -0800 Subject: Drivers: hv: vmbus: remove code duplication in message handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have 3 functions dealing with messages and they all implement the same logic to finalize reads, move it to vmbus_signal_eom(). Suggested-by: Radim Krcmar Signed-off-by: Vitaly Kuznetsov Reviewed-by: Radim Kr.má Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 10 +--------- drivers/hv/hyperv_vmbus.h | 24 ++++++++++++++++++++++++ drivers/hv/vmbus_drv.c | 40 ++-------------------------------------- 3 files changed, 27 insertions(+), 47 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index f70e35278b94..73a17be1f340 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -614,15 +614,7 @@ static void vmbus_wait_for_unload(void) if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) unloaded = true; - msg->header.message_type = HVMSG_NONE; - /* - * header.message_type needs to be written before we do - * wrmsrl() below. - */ - mb(); - - if (msg->header.message_flags.msg_pending) - wrmsrl(HV_X64_MSR_EOM, 0); + vmbus_signal_eom(msg); if (unloaded) break; diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index b0299da9c2db..cada56a2daa0 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -624,6 +624,30 @@ struct vmbus_channel_message_table_entry { extern struct vmbus_channel_message_table_entry channel_message_table[CHANNELMSG_COUNT]; +/* Free the message slot and signal end-of-message if required */ +static inline void vmbus_signal_eom(struct hv_message *msg) +{ + msg->header.message_type = HVMSG_NONE; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + mb(); + + if (msg->header.message_flags.msg_pending) { + /* + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor + */ + wrmsrl(HV_X64_MSR_EOM, 0); + } +} + /* General vmbus interface */ struct hv_device *vmbus_device_create(const uuid_le *type, diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index c8f1671944d5..6cd12f108a32 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -709,25 +709,7 @@ static void hv_process_timer_expiration(struct hv_message *msg, int cpu) if (dev->event_handler) dev->event_handler(dev); - msg->header.message_type = HVMSG_NONE; - - /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot - */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } + vmbus_signal_eom(msg); } static void vmbus_on_msg_dpc(unsigned long data) @@ -765,25 +747,7 @@ static void vmbus_on_msg_dpc(unsigned long data) entry->message_handler(hdr); msg_handled: - msg->header.message_type = HVMSG_NONE; - - /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot - */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } + vmbus_signal_eom(msg); } static void vmbus_isr(void) -- cgit v1.2.3 From d452ab7b4c65dfcaee88a0d6866eeeb98a3d1884 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Fri, 26 Feb 2016 15:13:18 -0800 Subject: Drivers: hv: vmbus: avoid unneeded compiler optimizations in vmbus_wait_for_unload() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Message header is modified by the hypervisor and we read it in a loop, we need to prevent compilers from optimizing accesses. There are no such optimizations at this moment, this is just a future proof. Suggested-by: Radim Krcmar Signed-off-by: Vitaly Kuznetsov Reviewed-by: Radim Kr.má Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 73a17be1f340..38b682bab85a 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -605,7 +605,7 @@ static void vmbus_wait_for_unload(void) bool unloaded = false; while (1) { - if (msg->header.message_type == HVMSG_NONE) { + if (READ_ONCE(msg->header.message_type) == HVMSG_NONE) { mdelay(10); continue; } -- cgit v1.2.3 From b9830d120cbe155863399f25eaef6aa8353e767f Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 26 Feb 2016 15:13:19 -0800 Subject: Drivers: hv: util: Pass the channel information during the init call Pass the channel information to the util drivers that need to defer reading the channel while they are processing a request. This would address the following issue reported by Vitaly: Commit 3cace4a61610 ("Drivers: hv: utils: run polling callback always in interrupt context") removed direct *_transaction.state = HVUTIL_READY assignments from *_handle_handshake() functions introducing the following race: if a userspace daemon connects before we get first non-negotiation request from the server hv_poll_channel() won't set transaction state to HVUTIL_READY as (!channel) condition will fail, we set it to non-NULL on the first real request from the server. Signed-off-by: K. Y. Srinivasan Reported-by: Vitaly Kuznetsov Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_fcopy.c | 2 +- drivers/hv/hv_kvp.c | 2 +- drivers/hv/hv_snapshot.c | 2 +- drivers/hv/hv_util.c | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index c37a71e13de0..23c70799ad8a 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -251,7 +251,6 @@ void hv_fcopy_onchannelcallback(void *context) */ fcopy_transaction.recv_len = recvlen; - fcopy_transaction.recv_channel = channel; fcopy_transaction.recv_req_id = requestid; fcopy_transaction.fcopy_msg = fcopy_msg; @@ -317,6 +316,7 @@ static void fcopy_on_reset(void) int hv_fcopy_init(struct hv_util_service *srv) { recv_buffer = srv->recv_buffer; + fcopy_transaction.recv_channel = srv->channel; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index d4ab81bcd515..9b9b370fe22a 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -639,7 +639,6 @@ void hv_kvp_onchannelcallback(void *context) */ kvp_transaction.recv_len = recvlen; - kvp_transaction.recv_channel = channel; kvp_transaction.recv_req_id = requestid; kvp_transaction.kvp_msg = kvp_msg; @@ -688,6 +687,7 @@ int hv_kvp_init(struct hv_util_service *srv) { recv_buffer = srv->recv_buffer; + kvp_transaction.recv_channel = srv->channel; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index 67def4a831c8..3fba14e88f03 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -263,7 +263,6 @@ void hv_vss_onchannelcallback(void *context) */ vss_transaction.recv_len = recvlen; - vss_transaction.recv_channel = channel; vss_transaction.recv_req_id = requestid; vss_transaction.msg = (struct hv_vss_msg *)vss_msg; @@ -337,6 +336,7 @@ hv_vss_init(struct hv_util_service *srv) return -ENOTSUPP; } recv_buffer = srv->recv_buffer; + vss_transaction.recv_channel = srv->channel; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 7994ec2e4151..d5acaa2d8e61 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -322,6 +322,7 @@ static int util_probe(struct hv_device *dev, srv->recv_buffer = kmalloc(PAGE_SIZE * 4, GFP_KERNEL); if (!srv->recv_buffer) return -ENOMEM; + srv->channel = dev->channel; if (srv->util_init) { ret = srv->util_init(srv); if (ret) { -- cgit v1.2.3 From e66853b09017a788dc384dadce9323396dae3293 Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Fri, 26 Feb 2016 15:13:20 -0800 Subject: Drivers: hv: utils: Remove util transport handler from list if registration fails If util transport fails to initialize for any reason, the list of transport handlers may become corrupted due to freeing the transport handler without removing it from the list. Fix this by cleaning it up from the list. Signed-off-by: Alex Ng Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_utils_transport.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c index 4f42c0e20c20..9a9983fa4531 100644 --- a/drivers/hv/hv_utils_transport.c +++ b/drivers/hv/hv_utils_transport.c @@ -310,6 +310,9 @@ struct hvutil_transport *hvutil_transport_init(const char *name, return hvt; err_free_hvt: + spin_lock(&hvt_list_lock); + list_del(&hvt->list); + spin_unlock(&hvt_list_lock); kfree(hvt); return NULL; } -- cgit v1.2.3 From d81274aae61c0a045cd0f34191c51fa64ba58bc4 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 26 Feb 2016 15:13:21 -0800 Subject: Drivers: hv: vmbus: Support handling messages on multiple CPUs Starting with Windows 2012 R2, message inteerupts can be delivered on any VCPU in the guest. Support this functionality. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv.c | 10 ++++++++++ drivers/hv/hyperv_vmbus.h | 4 +++- drivers/hv/vmbus_drv.c | 10 ++++------ 3 files changed, 17 insertions(+), 7 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index ccb335f57c88..a1c086ba3b9a 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -204,6 +204,8 @@ int hv_init(void) sizeof(int) * NR_CPUS); memset(hv_context.event_dpc, 0, sizeof(void *) * NR_CPUS); + memset(hv_context.msg_dpc, 0, + sizeof(void *) * NR_CPUS); memset(hv_context.clk_evt, 0, sizeof(void *) * NR_CPUS); @@ -415,6 +417,13 @@ int hv_synic_alloc(void) } tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu); + hv_context.msg_dpc[cpu] = kmalloc(size, GFP_ATOMIC); + if (hv_context.msg_dpc[cpu] == NULL) { + pr_err("Unable to allocate event dpc\n"); + goto err; + } + tasklet_init(hv_context.msg_dpc[cpu], vmbus_on_msg_dpc, cpu); + hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC); if (hv_context.clk_evt[cpu] == NULL) { pr_err("Unable to allocate clock event device\n"); @@ -456,6 +465,7 @@ err: static void hv_synic_free_cpu(int cpu) { kfree(hv_context.event_dpc[cpu]); + kfree(hv_context.msg_dpc[cpu]); kfree(hv_context.clk_evt[cpu]); if (hv_context.synic_event_page[cpu]) free_page((unsigned long)hv_context.synic_event_page[cpu]); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index cada56a2daa0..a64b17661d17 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -449,10 +449,11 @@ struct hv_context { u32 vp_index[NR_CPUS]; /* * Starting with win8, we can take channel interrupts on any CPU; - * we will manage the tasklet that handles events on a per CPU + * we will manage the tasklet that handles events messages on a per CPU * basis. */ struct tasklet_struct *event_dpc[NR_CPUS]; + struct tasklet_struct *msg_dpc[NR_CPUS]; /* * To optimize the mapping of relid to channel, maintain * per-cpu list of the channels based on their CPU affinity. @@ -675,6 +676,7 @@ int vmbus_post_msg(void *buffer, size_t buflen); void vmbus_set_event(struct vmbus_channel *channel); void vmbus_on_event(unsigned long data); +void vmbus_on_msg_dpc(unsigned long data); int hv_kvp_init(struct hv_util_service *); void hv_kvp_deinit(void); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 6cd12f108a32..64713ff47e36 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -45,7 +45,6 @@ static struct acpi_device *hv_acpi_dev; -static struct tasklet_struct msg_dpc; static struct completion probe_event; @@ -712,7 +711,7 @@ static void hv_process_timer_expiration(struct hv_message *msg, int cpu) vmbus_signal_eom(msg); } -static void vmbus_on_msg_dpc(unsigned long data) +void vmbus_on_msg_dpc(unsigned long data) { int cpu = smp_processor_id(); void *page_addr = hv_context.synic_message_page[cpu]; @@ -800,7 +799,7 @@ static void vmbus_isr(void) if (msg->header.message_type == HVMSG_TIMER_EXPIRED) hv_process_timer_expiration(msg, cpu); else - tasklet_schedule(&msg_dpc); + tasklet_schedule(hv_context.msg_dpc[cpu]); } } @@ -824,8 +823,6 @@ static int vmbus_bus_init(void) return ret; } - tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0); - ret = bus_register(&hv_bus); if (ret) goto err_cleanup; @@ -1321,7 +1318,8 @@ static void __exit vmbus_exit(void) hv_synic_clockevents_cleanup(); vmbus_disconnect(); hv_remove_vmbus_irq(); - tasklet_kill(&msg_dpc); + for_each_online_cpu(cpu) + tasklet_kill(hv_context.msg_dpc[cpu]); vmbus_free_channels(); if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { unregister_die_notifier(&hyperv_die_block); -- cgit v1.2.3 From 7268644734f6a300353a4c4ff8bf3e013ba80f89 Mon Sep 17 00:00:00 2001 From: Alex Ng Date: Fri, 26 Feb 2016 15:13:22 -0800 Subject: Drivers: hv: vmbus: Support kexec on ws2012 r2 and above WS2012 R2 and above hosts can support kexec in that thay can support reconnecting to the host (as would be needed in the kexec path) on any CPU. Enable this. Pre ws2012 r2 hosts don't have this ability and consequently cannot support kexec. Signed-off-by: Alex Ng Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/connection.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/hv') diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 3b6dc0017269..d02f1373dd98 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -88,8 +88,16 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, * This has been the behavior pre-win8. This is not * perf issue and having all channel messages delivered on CPU 0 * would be ok. + * For post win8 hosts, we support receiving channel messagges on + * all the CPUs. This is needed for kexec to work correctly where + * the CPU attempting to connect may not be CPU 0. */ - msg->target_vcpu = 0; + if (version >= VERSION_WIN8_1) { + msg->target_vcpu = hv_context.vp_index[get_cpu()]; + put_cpu(); + } else { + msg->target_vcpu = 0; + } /* * Add to list before we send the request since we may -- cgit v1.2.3