diff options
29 files changed, 823 insertions, 335 deletions
diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index 2b1f4a1e9272..8e735b5e56bd 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -123,44 +123,6 @@ static void *alpha_noop_alloc_coherent(struct device *dev, size_t size, return ret; } -static void alpha_noop_free_coherent(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - struct dma_attrs *attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t alpha_noop_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - struct dma_attrs *attrs) -{ - return page_to_pa(page) + offset; -} - -static int alpha_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, struct dma_attrs *attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - void *va; - - BUG_ON(!sg_page(sg)); - va = sg_virt(sg); - sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -static int alpha_noop_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} - static int alpha_noop_supported(struct device *dev, u64 mask) { return mask < 0x00ffffffUL ? 0 : 1; @@ -168,10 +130,10 @@ static int alpha_noop_supported(struct device *dev, u64 mask) struct dma_map_ops alpha_noop_ops = { .alloc = alpha_noop_alloc_coherent, - .free = alpha_noop_free_coherent, - .map_page = alpha_noop_map_page, - .map_sg = alpha_noop_map_sg, - .mapping_error = alpha_noop_mapping_error, + .free = dma_noop_free_coherent, + .map_page = dma_noop_map_page, + .map_sg = dma_noop_map_sg, + .mapping_error = dma_noop_mapping_error, .dma_supported = alpha_noop_supported, }; diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 7e3e8a8338d6..b9df8d11d7a9 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -124,6 +124,7 @@ config S390 select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK + select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_FTRACE_MCOUNT_RECORD @@ -617,10 +618,6 @@ config HAS_IOMEM config IOMMU_HELPER def_bool PCI -config HAS_DMA - def_bool PCI - select HAVE_DMA_API_DEBUG - config NEED_SG_DMA_LENGTH def_bool PCI diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h index d8f9872b0e2d..4a9f35e0973f 100644 --- a/arch/s390/include/asm/device.h +++ b/arch/s390/include/asm/device.h @@ -3,5 +3,9 @@ * * This file is released under the GPLv2 */ -#include <asm-generic/device.h> +struct dev_archdata { + struct dma_map_ops *dma_ops; +}; +struct pdev_archdata { +}; diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index e64bfcb9702f..3249b7464889 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -11,11 +11,13 @@ #define DMA_ERROR_CODE (~(dma_addr_t) 0x0) -extern struct dma_map_ops s390_dma_ops; +extern struct dma_map_ops s390_pci_dma_ops; static inline struct dma_map_ops *get_dma_ops(struct device *dev) { - return &s390_dma_ops; + if (dev && dev->archdata.dma_ops) + return dev->archdata.dma_ops; + return &dma_noop_ops; } static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 9fd59a7cfcd3..871af75c69c2 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -641,6 +641,7 @@ int pcibios_add_device(struct pci_dev *pdev) int i; pdev->dev.groups = zpci_attr_groups; + pdev->dev.archdata.dma_ops = &s390_pci_dma_ops; zpci_map_resources(pdev); for (i = 0; i < PCI_BAR_COUNT; i++) { diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index a06ce8037cec..e595e89eac65 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -547,7 +547,7 @@ static int __init dma_debug_do_init(void) } fs_initcall(dma_debug_do_init); -struct dma_map_ops s390_dma_ops = { +struct dma_map_ops s390_pci_dma_ops = { .alloc = s390_dma_alloc, .free = s390_dma_free, .map_sg = s390_dma_map_sg, @@ -558,7 +558,7 @@ struct dma_map_ops s390_dma_ops = { .is_phys = 0, /* dma_supported is unconditionally true without a callback */ }; -EXPORT_SYMBOL_GPL(s390_dma_ops); +EXPORT_SYMBOL_GPL(s390_pci_dma_ops); static int __init s390_iommu_setup(char *str) { diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6ca35495a5be..28cff0d23d82 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -477,8 +477,13 @@ static int virtblk_get_cache_mode(struct virtio_device *vdev) err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, struct virtio_blk_config, wce, &writeback); + + /* + * If WCE is not configurable and flush is not available, + * assume no writeback cache is in use. + */ if (err) - writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE); + writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH); return writeback; } @@ -833,14 +838,14 @@ static const struct virtio_device_id id_table[] = { static unsigned int features_legacy[] = { VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, - VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, + VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, } ; static unsigned int features[] = { VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, - VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, + VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, VIRTIO_BLK_F_MQ, }; diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fb0eae42bf39..49d84e540343 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -260,7 +260,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, p = page_address(page) + offset; /* copy small packet so we can reuse these pages for small data */ - skb = netdev_alloc_skb_ip_align(vi->dev, GOOD_COPY_LEN); + skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN); if (unlikely(!skb)) return NULL; diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index bf2d1300a957..8688ad4c825f 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -342,13 +342,14 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev, ccw->count = sizeof(*thinint_area); ccw->cda = (__u32)(unsigned long) thinint_area; } else { + /* payload is the address of the indicators */ indicatorp = kmalloc(sizeof(&vcdev->indicators), GFP_DMA | GFP_KERNEL); if (!indicatorp) return; *indicatorp = 0; ccw->cmd_code = CCW_CMD_SET_IND; - ccw->count = sizeof(vcdev->indicators); + ccw->count = sizeof(&vcdev->indicators); ccw->cda = (__u32)(unsigned long) indicatorp; } /* Deregister indicators from host. */ @@ -656,7 +657,10 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, } } ret = -ENOMEM; - /* We need a data area under 2G to communicate. */ + /* + * We need a data area under 2G to communicate. Our payload is + * the address of the indicators. + */ indicatorp = kmalloc(sizeof(&vcdev->indicators), GFP_DMA | GFP_KERNEL); if (!indicatorp) goto out; @@ -672,7 +676,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, vcdev->indicators = 0; ccw->cmd_code = CCW_CMD_SET_IND; ccw->flags = 0; - ccw->count = sizeof(vcdev->indicators); + ccw->count = sizeof(&vcdev->indicators); ccw->cda = (__u32)(unsigned long) indicatorp; ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND); if (ret) @@ -683,7 +687,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, vcdev->indicators2 = 0; ccw->cmd_code = CCW_CMD_SET_CONF_IND; ccw->flags = 0; - ccw->count = sizeof(vcdev->indicators2); + ccw->count = sizeof(&vcdev->indicators2); ccw->cda = (__u32)(unsigned long) indicatorp; ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_CONF_IND); if (ret) @@ -945,8 +949,7 @@ static struct virtio_config_ops virtio_ccw_config_ops = { static void virtio_ccw_release_dev(struct device *_d) { - struct virtio_device *dev = container_of(_d, struct virtio_device, - dev); + struct virtio_device *dev = dev_to_virtio(_d); struct virtio_ccw_device *vcdev = to_vc_device(dev); kfree(vcdev->status); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9eda69e40678..f744eeb3e2b4 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -287,6 +287,43 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) rcu_read_unlock_bh(); } +static inline unsigned long busy_clock(void) +{ + return local_clock() >> 10; +} + +static bool vhost_can_busy_poll(struct vhost_dev *dev, + unsigned long endtime) +{ + return likely(!need_resched()) && + likely(!time_after(busy_clock(), endtime)) && + likely(!signal_pending(current)) && + !vhost_has_work(dev); +} + +static int vhost_net_tx_get_vq_desc(struct vhost_net *net, + struct vhost_virtqueue *vq, + struct iovec iov[], unsigned int iov_size, + unsigned int *out_num, unsigned int *in_num) +{ + unsigned long uninitialized_var(endtime); + int r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + out_num, in_num, NULL, NULL); + + if (r == vq->num && vq->busyloop_timeout) { + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + while (vhost_can_busy_poll(vq->dev, endtime) && + vhost_vq_avail_empty(vq->dev, vq)) + cpu_relax_lowlatency(); + preempt_enable(); + r = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov), + out_num, in_num, NULL, NULL); + } + + return r; +} + /* Expects to be always run from workqueue - which acts as * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) @@ -331,10 +368,9 @@ static void handle_tx(struct vhost_net *net) % UIO_MAXIOV == nvq->done_idx)) break; - head = vhost_get_vq_desc(vq, vq->iov, - ARRAY_SIZE(vq->iov), - &out, &in, - NULL, NULL); + head = vhost_net_tx_get_vq_desc(net, vq, vq->iov, + ARRAY_SIZE(vq->iov), + &out, &in); /* On error, stop handling until the next kick. */ if (unlikely(head < 0)) break; @@ -435,6 +471,38 @@ static int peek_head_len(struct sock *sk) return len; } +static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk) +{ + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &nvq->vq; + unsigned long uninitialized_var(endtime); + int len = peek_head_len(sk); + + if (!len && vq->busyloop_timeout) { + /* Both tx vq and rx socket were polled here */ + mutex_lock(&vq->mutex); + vhost_disable_notify(&net->dev, vq); + + preempt_disable(); + endtime = busy_clock() + vq->busyloop_timeout; + + while (vhost_can_busy_poll(&net->dev, endtime) && + skb_queue_empty(&sk->sk_receive_queue) && + vhost_vq_avail_empty(&net->dev, vq)) + cpu_relax_lowlatency(); + + preempt_enable(); + + if (vhost_enable_notify(&net->dev, vq)) + vhost_poll_queue(&vq->poll); + mutex_unlock(&vq->mutex); + + len = peek_head_len(sk); + } + + return len; +} + /* This is a multi-buffer version of vhost_get_desc, that works if * vq has read descriptors only. * @vq - the relevant virtqueue @@ -553,7 +621,7 @@ static void handle_rx(struct vhost_net *net) vq->log : NULL; mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF); - while ((sock_len = peek_head_len(sock->sk))) { + while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; headcount = get_rx_bufs(vq, vq->heads, vhost_len, @@ -917,7 +985,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) vhost_net_disable_vq(n, vq); vq->private_data = sock; - r = vhost_init_used(vq); + r = vhost_vq_init_access(vq); if (r) goto err_used; r = vhost_net_enable_vq(n, vq); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 29cfc57d496e..f898686cdd93 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1274,7 +1274,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, vq = &vs->vqs[i].vq; mutex_lock(&vq->mutex); vq->private_data = vs_tpg; - vhost_init_used(vq); + vhost_vq_init_access(vq); mutex_unlock(&vq->mutex); } ret = 0; diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index f2882ac98726..388eec4e1a90 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -196,7 +196,7 @@ static long vhost_test_run(struct vhost_test *n, int test) oldpriv = vq->private_data; vq->private_data = priv; - r = vhost_init_used(&n->vqs[index]); + r = vhost_vq_init_access(&n->vqs[index]); mutex_unlock(&vq->mutex); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 236553e81027..669fef1e2bb6 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -43,11 +43,21 @@ enum { #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) #ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY -static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { vq->user_be = !virtio_legacy_is_little_endian(); } +static void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq) +{ + vq->user_be = true; +} + +static void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq) +{ + vq->user_be = false; +} + static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) { struct vhost_vring_state s; @@ -62,7 +72,10 @@ static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) s.num != VHOST_VRING_BIG_ENDIAN) return -EINVAL; - vq->user_be = s.num; + if (s.num == VHOST_VRING_BIG_ENDIAN) + vhost_enable_cross_endian_big(vq); + else + vhost_enable_cross_endian_little(vq); return 0; } @@ -91,7 +104,7 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq) vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; } #else -static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq) +static void vhost_disable_cross_endian(struct vhost_virtqueue *vq) { } @@ -113,6 +126,11 @@ static void vhost_init_is_le(struct vhost_virtqueue *vq) } #endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ +static void vhost_reset_is_le(struct vhost_virtqueue *vq) +{ + vq->is_le = virtio_legacy_is_little_endian(); +} + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -245,6 +263,13 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) } EXPORT_SYMBOL_GPL(vhost_work_queue); +/* A lockless hint for busy polling code to exit the loop */ +bool vhost_has_work(struct vhost_dev *dev) +{ + return !list_empty(&dev->work_list); +} +EXPORT_SYMBOL_GPL(vhost_has_work); + void vhost_poll_queue(struct vhost_poll *poll) { vhost_work_queue(poll->dev, &poll->work); @@ -276,8 +301,9 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call = NULL; vq->log_ctx = NULL; vq->memory = NULL; - vq->is_le = virtio_legacy_is_little_endian(); - vhost_vq_reset_user_be(vq); + vhost_reset_is_le(vq); + vhost_disable_cross_endian(vq); + vq->busyloop_timeout = 0; } static int vhost_worker(void *data) @@ -912,6 +938,19 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) case VHOST_GET_VRING_ENDIAN: r = vhost_get_vring_endian(vq, idx, argp); break; + case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: + if (copy_from_user(&s, argp, sizeof(s))) { + r = -EFAULT; + break; + } + vq->busyloop_timeout = s.num; + break; + case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: + s.index = idx; + s.num = vq->busyloop_timeout; + if (copy_to_user(argp, &s, sizeof(s))) + r = -EFAULT; + break; default: r = -ENOIOCTLCMD; } @@ -1152,14 +1191,14 @@ static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) return 0; } -int vhost_init_used(struct vhost_virtqueue *vq) +int vhost_vq_init_access(struct vhost_virtqueue *vq) { __virtio16 last_used_idx; int r; bool is_le = vq->is_le; if (!vq->private_data) { - vq->is_le = virtio_legacy_is_little_endian(); + vhost_reset_is_le(vq); return 0; } @@ -1182,7 +1221,7 @@ err: vq->is_le = is_le; return r; } -EXPORT_SYMBOL_GPL(vhost_init_used); +EXPORT_SYMBOL_GPL(vhost_vq_init_access); static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, struct iovec iov[], int iov_size) @@ -1633,6 +1672,20 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, } EXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); +/* return true if we're sure that avaiable ring is empty */ +bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ + __virtio16 avail_idx; + int r; + + r = __get_user(avail_idx, &vq->avail->idx); + if (r) + return false; + + return vhost16_to_cpu(vq, avail_idx) == vq->avail_idx; +} +EXPORT_SYMBOL_GPL(vhost_vq_avail_empty); + /* OK, now we need to know about added descriptors. */ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index d3f767448a72..d36d8beb3351 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -37,6 +37,7 @@ struct vhost_poll { void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work); +bool vhost_has_work(struct vhost_dev *dev); void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, unsigned long mask, struct vhost_dev *dev); @@ -114,6 +115,7 @@ struct vhost_virtqueue { /* Ring endianness requested by userspace for cross-endian support. */ bool user_be; #endif + u32 busyloop_timeout; }; struct vhost_dev { @@ -148,7 +150,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *, struct vhost_log *log, unsigned int *log_num); void vhost_discard_vq_desc(struct vhost_virtqueue *, int n); -int vhost_init_used(struct vhost_virtqueue *); +int vhost_vq_init_access(struct vhost_virtqueue *); int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len); int vhost_add_used_n(struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); @@ -158,6 +160,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); +bool vhost_vq_avail_empty(struct vhost_dev *, struct vhost_virtqueue *); bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index cab9f3f63a38..77590320d44c 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -60,7 +60,7 @@ config VIRTIO_INPUT config VIRTIO_MMIO tristate "Platform bus driver for memory mapped virtio devices" - depends on HAS_IOMEM + depends on HAS_IOMEM && HAS_DMA select VIRTIO ---help--- This drivers provides support for memory mapped virtio diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index f2b77dea8d3c..7b6d74f0c72f 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -22,8 +22,7 @@ #include <linux/virtio.h> #include <linux/virtio_balloon.h> #include <linux/swap.h> -#include <linux/kthread.h> -#include <linux/freezer.h> +#include <linux/workqueue.h> #include <linux/delay.h> #include <linux/slab.h> #include <linux/module.h> @@ -50,11 +49,13 @@ struct virtio_balloon { struct virtio_device *vdev; struct virtqueue *inflate_vq, *deflate_vq, *stats_vq; - /* Where the ballooning thread waits for config to change. */ - wait_queue_head_t config_change; + /* The balloon servicing is delegated to a freezable workqueue. */ + struct work_struct update_balloon_stats_work; + struct work_struct update_balloon_size_work; - /* The thread servicing the balloon. */ - struct task_struct *thread; + /* Prevent updating balloon when it is being canceled. */ + spinlock_t stop_update_lock; + bool stop_update; /* Waiting for host to ack the pages we released. */ wait_queue_head_t acked; @@ -77,7 +78,6 @@ struct virtio_balloon { u32 pfns[VIRTIO_BALLOON_ARRAY_PFNS_MAX]; /* Memory statistics */ - int need_stats_update; struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; /* To register callback in oom notifier call chain */ @@ -124,6 +124,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq) /* When host has read buffer, this completes via balloon_ack */ wait_event(vb->acked, virtqueue_get_buf(vq, &len)); + } static void set_page_pfns(u32 pfns[], struct page *page) @@ -136,9 +137,10 @@ static void set_page_pfns(u32 pfns[], struct page *page) pfns[i] = page_to_balloon_pfn(page) + i; } -static void fill_balloon(struct virtio_balloon *vb, size_t num) +static unsigned fill_balloon(struct virtio_balloon *vb, size_t num) { struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info; + unsigned num_allocated_pages; /* We can only do one array worth at a time. */ num = min(num, ARRAY_SIZE(vb->pfns)); @@ -163,10 +165,13 @@ static void fill_balloon(struct virtio_balloon *vb, size_t num) adjust_managed_page_count(page, -1); } + num_allocated_pages = vb->num_pfns; /* Did we get any? */ if (vb->num_pfns != 0) tell_host(vb, vb->inflate_vq); mutex_unlock(&vb->balloon_lock); + + return num_allocated_pages; } static void release_pages_balloon(struct virtio_balloon *vb) @@ -257,14 +262,17 @@ static void update_balloon_stats(struct virtio_balloon *vb) * with a single buffer. From that point forward, all conversations consist of * a hypervisor request (a call to this function) which directs us to refill * the virtqueue with a fresh stats buffer. Since stats collection can sleep, - * we notify our kthread which does the actual work via stats_handle_request(). + * we delegate the job to a freezable workqueue that will do the actual work via + * stats_handle_request(). */ static void stats_request(struct virtqueue *vq) { struct virtio_balloon *vb = vq->vdev->priv; - vb->need_stats_update = 1; - wake_up(&vb->config_change); + spin_lock(&vb->stop_update_lock); + if (!vb->stop_update) + queue_work(system_freezable_wq, &vb->update_balloon_stats_work); + spin_unlock(&vb->stop_update_lock); } static void stats_handle_request(struct virtio_balloon *vb) @@ -273,7 +281,6 @@ static void stats_handle_request(struct virtio_balloon *vb) struct scatterlist sg; unsigned int len; - vb->need_stats_update = 0; update_balloon_stats(vb); vq = vb->stats_vq; @@ -287,8 +294,12 @@ static void stats_handle_request(struct virtio_balloon *vb) static void virtballoon_changed(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; + unsigned long flags; - wake_up(&vb->config_change); + spin_lock_irqsave(&vb->stop_update_lock, flags); + if (!vb->stop_update) + queue_work(system_freezable_wq, &vb->update_balloon_size_work); + spin_unlock_irqrestore(&vb->stop_update_lock, flags); } static inline s64 towards_target(struct virtio_balloon *vb) @@ -351,43 +362,32 @@ static int virtballoon_oom_notify(struct notifier_block *self, return NOTIFY_OK; } -static int balloon(void *_vballoon) +static void update_balloon_stats_func(struct work_struct *work) { - struct virtio_balloon *vb = _vballoon; - DEFINE_WAIT_FUNC(wait, woken_wake_function); - - set_freezable(); - while (!kthread_should_stop()) { - s64 diff; - - try_to_freeze(); - - add_wait_queue(&vb->config_change, &wait); - for (;;) { - if ((diff = towards_target(vb)) != 0 || - vb->need_stats_update || - kthread_should_stop() || - freezing(current)) - break; - wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); - } - remove_wait_queue(&vb->config_change, &wait); + struct virtio_balloon *vb; - if (vb->need_stats_update) - stats_handle_request(vb); - if (diff > 0) - fill_balloon(vb, diff); - else if (diff < 0) - leak_balloon(vb, -diff); - update_balloon_size(vb); + vb = container_of(work, struct virtio_balloon, + update_balloon_stats_work); + stats_handle_request(vb); +} - /* - * For large balloon changes, we could spend a lot of time - * and always have work to do. Be nice if preempt disabled. - */ - cond_resched(); - } - return 0; +static void update_balloon_size_func(struct work_struct *work) +{ + struct virtio_balloon *vb; + s64 diff; + + vb = container_of(work, struct virtio_balloon, + update_balloon_size_work); + diff = towards_target(vb); + + if (diff > 0) + diff -= fill_balloon(vb, diff); + else if (diff < 0) + diff += leak_balloon(vb, -diff); + update_balloon_size(vb); + + if (diff) + queue_work(system_freezable_wq, work); } static int init_vqs(struct virtio_balloon *vb) @@ -505,12 +505,14 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out; } + INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func); + INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func); + spin_lock_init(&vb->stop_update_lock); + vb->stop_update = false; vb->num_pages = 0; mutex_init(&vb->balloon_lock); - init_waitqueue_head(&vb->config_change); init_waitqueue_head(&vb->acked); vb->vdev = vdev; - vb->need_stats_update = 0; balloon_devinfo_init(&vb->vb_dev_info); #ifdef CONFIG_BALLOON_COMPACTION @@ -529,16 +531,8 @@ static int virtballoon_probe(struct virtio_device *vdev) virtio_device_ready(vdev); - vb->thread = kthread_run(balloon, vb, "vballoon"); - if (IS_ERR(vb->thread)) { - err = PTR_ERR(vb->thread); - goto out_del_vqs; - } - return 0; -out_del_vqs: - unregister_oom_notifier(&vb->nb); out_oom_notify: vdev->config->del_vqs(vdev); out_free_vb: @@ -565,7 +559,13 @@ static void virtballoon_remove(struct virtio_device *vdev) struct virtio_balloon *vb = vdev->priv; unregister_oom_notifier(&vb->nb); - kthread_stop(vb->thread); + + spin_lock_irq(&vb->stop_update_lock); + vb->stop_update = true; + spin_unlock_irq(&vb->stop_update_lock); + cancel_work_sync(&vb->update_balloon_size_work); + cancel_work_sync(&vb->update_balloon_stats_work); + remove_common(vb); kfree(vb); } @@ -576,10 +576,9 @@ static int virtballoon_freeze(struct virtio_device *vdev) struct virtio_balloon *vb = vdev->priv; /* - * The kthread is already frozen by the PM core before this + * The workqueue is already frozen by the PM core before this * function is called. */ - remove_common(vb); return 0; } @@ -595,7 +594,8 @@ static int virtballoon_restore(struct virtio_device *vdev) virtio_device_ready(vdev); - fill_ballo |