summaryrefslogtreecommitdiffstats
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/Kconfig4
-rw-r--r--drivers/vhost/net.c31
-rw-r--r--drivers/vhost/scsi.c16
-rw-r--r--drivers/vhost/test.c14
-rw-r--r--drivers/vhost/vdpa.c121
-rw-r--r--drivers/vhost/vhost.c115
-rw-r--r--drivers/vhost/vhost.h35
-rw-r--r--drivers/vhost/vringh.c11
-rw-r--r--drivers/vhost/vsock.c37
9 files changed, 286 insertions, 98 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 362b832f5338..2c75d164b827 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -3,6 +3,8 @@ config VHOST_IOTLB
tristate
help
Generic IOTLB implementation for vhost and vringh.
+ This option is selected by any driver which needs to support
+ an IOMMU in software.
config VHOST_RING
tristate
@@ -63,7 +65,7 @@ config VHOST_VDPA
tristate "Vhost driver for vDPA-based backend"
depends on EVENTFD
select VHOST
- select VDPA
+ depends on VDPA
help
This kernel module can be loaded in host kernel to accelerate
guest virtio devices with the vDPA-based backends.
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 87469d67ede8..e992decfec53 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -424,7 +424,7 @@ static void vhost_net_disable_vq(struct vhost_net *n,
struct vhost_net_virtqueue *nvq =
container_of(vq, struct vhost_net_virtqueue, vq);
struct vhost_poll *poll = n->poll + (nvq - n->vqs);
- if (!vq->private_data)
+ if (!vhost_vq_get_backend(vq))
return;
vhost_poll_stop(poll);
}
@@ -437,7 +437,7 @@ static int vhost_net_enable_vq(struct vhost_net *n,
struct vhost_poll *poll = n->poll + (nvq - n->vqs);
struct socket *sock;
- sock = vq->private_data;
+ sock = vhost_vq_get_backend(vq);
if (!sock)
return 0;
@@ -524,7 +524,7 @@ static void vhost_net_busy_poll(struct vhost_net *net,
return;
vhost_disable_notify(&net->dev, vq);
- sock = rvq->private_data;
+ sock = vhost_vq_get_backend(rvq);
busyloop_timeout = poll_rx ? rvq->busyloop_timeout:
tvq->busyloop_timeout;
@@ -570,8 +570,10 @@ static int vhost_net_tx_get_vq_desc(struct vhost_net *net,
if (r == tvq->num && tvq->busyloop_timeout) {
/* Flush batched packets first */
- if (!vhost_sock_zcopy(tvq->private_data))
- vhost_tx_batch(net, tnvq, tvq->private_data, msghdr);
+ if (!vhost_sock_zcopy(vhost_vq_get_backend(tvq)))
+ vhost_tx_batch(net, tnvq,
+ vhost_vq_get_backend(tvq),
+ msghdr);
vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, false);
@@ -685,7 +687,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
struct vhost_virtqueue *vq = &nvq->vq;
struct vhost_net *net = container_of(vq->dev, struct vhost_net,
dev);
- struct socket *sock = vq->private_data;
+ struct socket *sock = vhost_vq_get_backend(vq);
struct page_frag *alloc_frag = &net->page_frag;
struct virtio_net_hdr *gso;
struct xdp_buff *xdp = &nvq->xdp[nvq->batched_xdp];
@@ -745,6 +747,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
xdp->data = buf + pad;
xdp->data_end = xdp->data + len;
hdr->buflen = buflen;
+ xdp->frame_sz = buflen;
--net->refcnt_bias;
alloc_frag->offset += buflen;
@@ -952,7 +955,7 @@ static void handle_tx(struct vhost_net *net)
struct socket *sock;
mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_TX);
- sock = vq->private_data;
+ sock = vhost_vq_get_backend(vq);
if (!sock)
goto out;
@@ -1121,7 +1124,7 @@ static void handle_rx(struct vhost_net *net)
int recv_pkts = 0;
mutex_lock_nested(&vq->mutex, VHOST_NET_VQ_RX);
- sock = vq->private_data;
+ sock = vhost_vq_get_backend(vq);
if (!sock)
goto out;
@@ -1324,7 +1327,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
}
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
UIO_MAXIOV + VHOST_NET_BATCH,
- VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT,
+ VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true,
NULL);
vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
@@ -1345,9 +1348,9 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
container_of(vq, struct vhost_net_virtqueue, vq);
mutex_lock(&vq->mutex);
- sock = vq->private_data;
+ sock = vhost_vq_get_backend(vq);
vhost_net_disable_vq(n, vq);
- vq->private_data = NULL;
+ vhost_vq_set_backend(vq, NULL);
vhost_net_buf_unproduce(nvq);
nvq->rx_ring = NULL;
mutex_unlock(&vq->mutex);
@@ -1521,7 +1524,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
}
/* start polling new socket */
- oldsock = vq->private_data;
+ oldsock = vhost_vq_get_backend(vq);
if (sock != oldsock) {
ubufs = vhost_net_ubuf_alloc(vq,
sock && vhost_sock_zcopy(sock));
@@ -1531,7 +1534,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
}
vhost_net_disable_vq(n, vq);
- vq->private_data = sock;
+ vhost_vq_set_backend(vq, sock);
vhost_net_buf_unproduce(nvq);
r = vhost_vq_init_access(vq);
if (r)
@@ -1568,7 +1571,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
return 0;
err_used:
- vq->private_data = oldsock;
+ vhost_vq_set_backend(vq, oldsock);
vhost_net_enable_vq(n, vq);
if (ubufs)
vhost_net_ubuf_put_wait_and_free(ubufs);
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index 3146ab880400..6fb4d7ecfa19 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -452,7 +452,7 @@ vhost_scsi_do_evt_work(struct vhost_scsi *vs, struct vhost_scsi_evt *evt)
unsigned out, in;
int head, ret;
- if (!vq->private_data) {
+ if (!vhost_vq_get_backend(vq)) {
vs->vs_events_missed = true;
return;
}
@@ -892,7 +892,7 @@ vhost_scsi_get_req(struct vhost_virtqueue *vq, struct vhost_scsi_ctx *vc,
} else {
struct vhost_scsi_tpg **vs_tpg, *tpg;
- vs_tpg = vq->private_data; /* validated at handler entry */
+ vs_tpg = vhost_vq_get_backend(vq); /* validated at handler entry */
tpg = READ_ONCE(vs_tpg[*vc->target]);
if (unlikely(!tpg)) {
@@ -929,7 +929,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
* We can handle the vq only after the endpoint is setup by calling the
* VHOST_SCSI_SET_ENDPOINT ioctl.
*/
- vs_tpg = vq->private_data;
+ vs_tpg = vhost_vq_get_backend(vq);
if (!vs_tpg)
goto out;
@@ -1184,7 +1184,7 @@ vhost_scsi_ctl_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
* We can handle the vq only after the endpoint is setup by calling the
* VHOST_SCSI_SET_ENDPOINT ioctl.
*/
- if (!vq->private_data)
+ if (!vhost_vq_get_backend(vq))
goto out;
memset(&vc, 0, sizeof(vc));
@@ -1322,7 +1322,7 @@ static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
mutex_lock(&vq->mutex);
- if (!vq->private_data)
+ if (!vhost_vq_get_backend(vq))
goto out;
if (vs->vs_events_missed)
@@ -1460,7 +1460,7 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs,
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
- vq->private_data = vs_tpg;
+ vhost_vq_set_backend(vq, vs_tpg);
vhost_vq_init_access(vq);
mutex_unlock(&vq->mutex);
}
@@ -1547,7 +1547,7 @@ vhost_scsi_clear_endpoint(struct vhost_scsi *vs,
for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
vq = &vs->vqs[i].vq;
mutex_lock(&vq->mutex);
- vq->private_data = NULL;
+ vhost_vq_set_backend(vq, NULL);
mutex_unlock(&vq->mutex);
}
}
@@ -1628,7 +1628,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
}
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
- VHOST_SCSI_WEIGHT, 0, NULL);
+ VHOST_SCSI_WEIGHT, 0, true, NULL);
vhost_scsi_init_inflight(vs, NULL);
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index e37c92d4d7ad..0466921f4772 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -49,7 +49,7 @@ static void handle_vq(struct vhost_test *n)
void *private;
mutex_lock(&vq->mutex);
- private = vq->private_data;
+ private = vhost_vq_get_backend(vq);
if (!private) {
mutex_unlock(&vq->mutex);
return;
@@ -120,7 +120,7 @@ static int vhost_test_open(struct inode *inode, struct file *f)
vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ];
n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick;
vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV,
- VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT);
+ VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL);
f->private_data = n;
@@ -133,8 +133,8 @@ static void *vhost_test_stop_vq(struct vhost_test *n,
void *private;
mutex_lock(&vq->mutex);
- private = vq->private_data;
- vq->private_data = NULL;
+ private = vhost_vq_get_backend(vq);
+ vhost_vq_set_backend(vq, NULL);
mutex_unlock(&vq->mutex);
return private;
}
@@ -198,8 +198,8 @@ static long vhost_test_run(struct vhost_test *n, int test)
priv = test ? n : NULL;
/* start polling new socket */
- oldpriv = vq->private_data;
- vq->private_data = priv;
+ oldpriv = vhost_vq_get_backend(vq);
+ vhost_vq_set_backend(vq, priv);
r = vhost_vq_init_access(&n->vqs[index]);
@@ -225,7 +225,7 @@ static long vhost_test_reset_owner(struct vhost_test *n)
{
void *priv = NULL;
long err;
- struct vhost_umem *umem;
+ struct vhost_iotlb *umem;
mutex_lock(&n->dev.mutex);
err = vhost_dev_check_owner(&n->dev);
diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 421f02a8530a..7580e34f76c1 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -15,12 +15,14 @@
#include <linux/module.h>
#include <linux/cdev.h>
#include <linux/device.h>
+#include <linux/mm.h>
#include <linux/iommu.h>
#include <linux/uuid.h>
#include <linux/vdpa.h>
#include <linux/nospec.h>
#include <linux/vhost.h>
#include <linux/virtio_net.h>
+#include <linux/kernel.h>
#include "vhost.h"
@@ -70,6 +72,7 @@ struct vhost_vdpa {
int nvqs;
int virtio_id;
int minor;
+ struct eventfd_ctx *config_ctx;
};
static DEFINE_IDA(vhost_vdpa_ida);
@@ -101,6 +104,17 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private)
return IRQ_HANDLED;
}
+static irqreturn_t vhost_vdpa_config_cb(void *private)
+{
+ struct vhost_vdpa *v = private;
+ struct eventfd_ctx *config_ctx = v->config_ctx;
+
+ if (config_ctx)
+ eventfd_signal(config_ctx, 1);
+
+ return IRQ_HANDLED;
+}
+
static void vhost_vdpa_reset(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
@@ -288,6 +302,36 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
return 0;
}
+static void vhost_vdpa_config_put(struct vhost_vdpa *v)
+{
+ if (v->config_ctx)
+ eventfd_ctx_put(v->config_ctx);
+}
+
+static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
+{
+ struct vdpa_callback cb;
+ int fd;
+ struct eventfd_ctx *ctx;
+
+ cb.callback = vhost_vdpa_config_cb;
+ cb.private = v->vdpa;
+ if (copy_from_user(&fd, argp, sizeof(fd)))
+ return -EFAULT;
+
+ ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
+ swap(ctx, v->config_ctx);
+
+ if (!IS_ERR_OR_NULL(ctx))
+ eventfd_ctx_put(ctx);
+
+ if (IS_ERR(v->config_ctx))
+ return PTR_ERR(v->config_ctx);
+
+ v->vdpa->config->set_config_cb(v->vdpa, &cb);
+
+ return 0;
+}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp)
{
@@ -296,7 +340,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
struct vdpa_callback cb;
struct vhost_virtqueue *vq;
struct vhost_vring_state s;
- u8 status;
u32 idx;
long r;
@@ -310,8 +353,6 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
idx = array_index_nospec(idx, v->nvqs);
vq = &v->vqs[idx];
- status = ops->get_status(vdpa);
-
if (cmd == VHOST_VDPA_SET_VRING_ENABLE) {
if (copy_from_user(&s, argp, sizeof(s)))
return -EFAULT;
@@ -398,6 +439,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
case VHOST_SET_LOG_FD:
r = -ENOIOCTLCMD;
break;
+ case VHOST_VDPA_SET_CONFIG_CALL:
+ r = vhost_vdpa_set_config_call(v, argp);
+ break;
default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp);
if (r == -ENOIOCTLCMD)
@@ -530,7 +574,7 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
if (!npages)
return -EINVAL;
- down_read(&dev->mm->mmap_sem);
+ mmap_read_lock(dev->mm);
locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -583,7 +627,7 @@ out:
vhost_vdpa_unmap(v, msg->iova, msg->size);
atomic64_sub(npages, &dev->mm->pinned_vm);
}
- up_read(&dev->mm->mmap_sem);
+ mmap_read_unlock(dev->mm);
free_page((unsigned long)page_list);
return ret;
}
@@ -678,8 +722,6 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
int nvqs, i, r, opened;
v = container_of(inode->i_cdev, struct vhost_vdpa, cdev);
- if (!v)
- return -ENODEV;
opened = atomic_cmpxchg(&v->opened, 0, 1);
if (opened)
@@ -699,7 +741,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
vqs[i] = &v->vqs[i];
vqs[i]->handle_kick = handle_vq_kick;
}
- vhost_dev_init(dev, vqs, nvqs, 0, 0, 0,
+ vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
vhost_vdpa_process_iotlb_msg);
dev->iotlb = vhost_iotlb_alloc(0, 0);
@@ -734,6 +776,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep)
vhost_dev_stop(&v->vdev);
vhost_vdpa_iotlb_free(v);
vhost_vdpa_free_domain(v);
+ vhost_vdpa_config_put(v);
vhost_dev_cleanup(&v->vdev);
kfree(v->vdev.vqs);
mutex_unlock(&d->mutex);
@@ -744,12 +787,74 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep)
return 0;
}
+#ifdef CONFIG_MMU
+static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf)
+{
+ struct vhost_vdpa *v = vmf->vma->vm_file->private_data;
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ struct vdpa_notification_area notify;
+ struct vm_area_struct *vma = vmf->vma;
+ u16 index = vma->vm_pgoff;
+
+ notify = ops->get_vq_notification(vdpa, index);
+
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
+ notify.addr >> PAGE_SHIFT, PAGE_SIZE,
+ vma->vm_page_prot))
+ return VM_FAULT_SIGBUS;
+
+ return VM_FAULT_NOPAGE;
+}
+
+static const struct vm_operations_struct vhost_vdpa_vm_ops = {
+ .fault = vhost_vdpa_fault,
+};
+
+static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct vhost_vdpa *v = vma->vm_file->private_data;
+ struct vdpa_device *vdpa = v->vdpa;
+ const struct vdpa_config_ops *ops = vdpa->config;
+ struct vdpa_notification_area notify;
+ int index = vma->vm_pgoff;
+
+ if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+ return -EINVAL;
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+ if (vma->vm_flags & VM_READ)
+ return -EINVAL;
+ if (index > 65535)
+ return -EINVAL;
+ if (!ops->get_vq_notification)
+ return -ENOTSUPP;
+
+ /* To be safe and easily modelled by userspace, We only
+ * support the doorbell which sits on the page boundary and
+ * does not share the page with other registers.
+ */
+ notify = ops->get_vq_notification(vdpa, index);
+ if (notify.addr & (PAGE_SIZE - 1))
+ return -EINVAL;
+ if (vma->vm_end - vma->vm_start != notify.size)
+ return -ENOTSUPP;
+
+ vma->vm_ops = &vhost_vdpa_vm_ops;
+ return 0;
+}
+#endif /* CONFIG_MMU */
+
static const struct file_operations vhost_vdpa_fops = {
.owner = THIS_MODULE,
.open = vhost_vdpa_open,
.release = vhost_vdpa_release,
.write_iter = vhost_vdpa_chr_write_iter,
.unlocked_ioctl = vhost_vdpa_unlocked_ioctl,
+#ifdef CONFIG_MMU
+ .mmap = vhost_vdpa_mmap,
+#endif /* CONFIG_MMU */
.compat_ioctl = compat_ptr_ioctl,
};
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index d450e16c5c25..d7b8df3edffc 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -14,7 +14,6 @@
#include <linux/vhost.h>
#include <linux/uio.h>
#include <linux/mm.h>
-#include <linux/mmu_context.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
#include <linux/poll.h>
@@ -166,11 +165,16 @@ static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync,
void *key)
{
struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
+ struct vhost_work *work = &poll->work;
if (!(key_to_poll(key) & poll->mask))
return 0;
- vhost_poll_queue(poll);
+ if (!poll->dev->use_worker)
+ work->fn(work);
+ else
+ vhost_poll_queue(poll);
+
return 0;
}
@@ -330,10 +334,8 @@ static int vhost_worker(void *data)
struct vhost_dev *dev = data;
struct vhost_work *work, *work_next;
struct llist_node *node;
- mm_segment_t oldfs = get_fs();
- set_fs(USER_DS);
- use_mm(dev->mm);
+ kthread_use_mm(dev->mm);
for (;;) {
/* mb paired w/ kthread_stop */
@@ -361,8 +363,7 @@ static int vhost_worker(void *data)
schedule();
}
}
- unuse_mm(dev->mm);
- set_fs(oldfs);
+ kthread_unuse_mm(dev->mm);
return 0;
}
@@ -454,6 +455,7 @@ static size_t vhost_get_desc_size(struct vhost_virtqueue *vq,
void vhost_dev_init(struct vhost_dev *dev,
struct vhost_virtqueue **vqs, int nvqs,
int iov_limit, int weight, int byte_weight,
+ bool use_worker,
int (*msg_handler)(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg))
{
@@ -471,6 +473,7 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->iov_limit = iov_limit;
dev->weight = weight;
dev->byte_weight = byte_weight;
+ dev->use_worker = use_worker;
dev->msg_handler = msg_handler;
init_llist_head(&dev->work_list);
init_waitqueue_head(&dev->wait);
@@ -534,6 +537,36 @@ bool vhost_dev_has_owner(struct vhost_dev *dev)
}
EXPORT_SYMBOL_GPL(vhost_dev_has_owner);
+static void vhost_attach_mm(struct vhost_dev *dev)
+{
+ /* No owner, become one */
+ if (dev->use_worker) {
+ dev->mm = get_task_mm(current);
+ } else {
+ /* vDPA device does not use worker thead, so there's
+ * no need to hold the address space for mm. This help
+ * to avoid deadlock in the case of mmap() which may
+ * held the refcnt of the file and depends on release
+ * method to remove vma.
+ */
+ dev->mm = current->mm;
+ mmgrab(dev->mm);
+ }
+}
+
+static void vhost_detach_mm(struct vhost_dev *dev)
+{
+ if (!dev->mm)
+ return;
+
+ if (dev->use_worker)
+ mmput(dev->mm);
+ else
+ mmdrop(dev->mm);
+
+ dev->mm = NULL;
+}
+
/* Caller should have device mutex */
long vhost_dev_set_owner(struct vhost_dev *dev)
{
@@ -546,21 +579,24 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
goto err_mm;
}
- /* No owner, become one */
- dev->mm = get_task_mm(current);
+ vhost_attach_mm(dev);
+
dev->kcov_handle = kcov_common_handle();
- worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid);
- if (IS_ERR(worker)) {
- err = PTR_ERR(worker);
- goto err_worker;
- }
+ if (dev->use_worker) {
+ worker = kthread_create(vhost_worker, dev,
+ "vhost-%d", current->pid);
+ if (IS_ERR(worker)) {
+ err = PTR_ERR(worker);
+ goto err_worker;
+ }
- dev->worker = worker;
- wake_up_process(worker); /* avoid contributing to loadavg */
+ dev->worker = worker;
+ wake_up_process(worker); /* avoid contributing to loadavg */
- err = vhost_attach_cgroups(dev);
- if (err)
- goto err_cgroup;
+ err = vhost_attach_cgroups(dev);
+ if (err)
+ goto err_cgroup;
+ }
err = vhost_dev_alloc_iovecs(dev);
if (err)
@@ -568,12 +604,12 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
return 0;
err_cgroup:
- kthread_stop(worker);
- dev->worker = NULL;
+ if (dev->worker) {
+ kthread_stop(dev->worker);
+ dev->worker = NULL;
+ }
err_worker:
- if (dev->mm)
- mmput(dev->mm);
- dev->mm = NULL;
+ vhost_detach_mm(dev);
dev->kcov_handle = 0;
err_mm:
return err;
@@ -670,9 +706,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
dev->worker = NULL;
dev->kcov_handle = 0;
}
- if (dev->mm)
- mmput(dev->mm);
- dev->mm = NULL;
+ vhost_detach_mm(dev);
}
EXPORT_SYMBOL_GPL(vhost_dev_cleanup);
@@ -730,7 +764,7 @@ static inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq,
if (!map)
return NULL;
- return (void *)(uintptr_t)(map->addr + addr - map->start);
+ return (void __user *)(uintptr_t)(map->addr + addr - map->start);
}
/* Can we switch to this memory table? */
@@ -869,7 +903,7 @@ static void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq,
* not happen in this case.
*/
static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
- void *addr, unsigned int size,
+ void __user *addr, unsigned int size,
int type)
{
void __user *uaddr = vhost_vq_meta_fetch(vq,
@@ -882,7 +916,7 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq,
#define vhost_put_user(vq, x, ptr) \
({ \
- int ret = -EFAULT; \
+ int ret; \
if (!vq->iotlb) { \
ret = __put_user(x, ptr); \
} else { \
@@ -1244,9 +1278,9 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access)
}
static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num,
- struct vring_desc __user *desc,
- struct vring_avail __user *avail,
- struct vring_used __user *used)
+ vring_desc_t __user *desc,
+ vring_avail_t __user *avail,
+ vring_used_t __user *used)
{
return access_ok(desc, vhost_get_desc_size(vq, num)) &&
@@ -1574,7 +1608,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT;
break;
}
- eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+ eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd);
if (IS_ERR(eventfp)) {
r = PTR_ERR(eventfp);
break;
@@ -1590,7 +1624,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT;
break;
}
- ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd);
+ ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
if (IS_ERR(ctx)) {
r = PTR_ERR(ctx);
break;
@@ -1602,7 +1636,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
r = -EFAULT;
break;
}
- ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd);
+ ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd);
if (IS_ERR(ctx)) {
r = PTR_ERR(ctx);
break;
@@ -1727,7 +1761,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
r = get_user(fd, (int __user *)argp);
if (r < 0)
break;
- ctx = fd == -1 ? NULL : eventfd_ctx_fdget(fd);
+ ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd);
if (IS_ERR(ctx)) {
r = PTR_ERR(ctx);
break;
@@ -1762,15 +1796,14 @@ static int set_bit_to_user(int nr, void __user *addr)
int bit = nr + (log % PAGE_SIZE) * 8;
int r;
- r = get_user_pages_fast(log, 1, FOLL_WRITE, &page);
+ r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page);
if (r < 0)
return r;
BUG_ON(r != 1);
base = kmap_atomic(page);
set_bit(bit, base);
kunmap_atomic(base);
- set_page_dirty_lock(page);
- put_page(page);
+ unpin_user_pages_dirty_lock(&page, 1, true);
return 0;
}
@@ -2301,7 +2334,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
struct vring_used_elem *heads,
unsigned count)
{
- struct vring_used_elem __user *used;
+ vring_used_elem_t __user *used;
u16 old, new;
int start;
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 181382185bbc..c8e96a095d3b 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -67,9 +67,9 @@ struct vhost_virtqueue {
/* The actual ring of buffers. */
struct mutex mutex;
unsigned int num;
- struct vring_desc __user *desc;
- struct vring_avail __user *avail;
- struct vring_used __user *used;
+ vring_desc_t __user *desc;
+ vring_avail_t __user *avail;
+ vring_used_t __user *used;
const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick;
struct eventfd_ctx *call_ctx;
@@ -154,6 +154,7 @@ struct vhost_dev {
int weight;
int byte_weight;
u64 kcov_handle;
+ bool use_worker;
int (*msg_handler)(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg);
};
@@ -161,6 +162,7 @@ struct vhost_dev {
bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
int nvqs, int iov_limit, int weight, int byte_weight,
+ bool use_worker,
int (*msg_handler)(struct vhost_dev *dev,
struct vhost_iotlb_msg *msg));
long vhost_dev_set_owner(struct vhost_dev *dev);
@@ -231,6 +233,33 @@ enum {
(1ULL << VIRTIO_F_VERSION_1)
};
+/**
+ * vhost_vq_set_backend - Set backend.
+ *
+ * @vq Virtqueue.
+ * @private_data The private data.
+ *
+ * Context: Need to call with vq->mutex acquired.
+ */
+static inline void vhost_vq_set_backend(struct vhost_virtqueue *vq,
+ void *private_data)
+{
+ vq->private_data = private_data;
+}
+
+/**
+ * vhost_vq_get_backend - Get backend.
+ *
+ * @vq Virtqueue.
+ *
+ * Context: Need to call with vq->mutex acquired.
+ * Return: Private data previously set with vhost_vq_set_backend.
+ */
+static inline void *vhost_vq_get_backend(struct vhost_virtqueue *vq)
+{
+ return vq->private_data;
+}
+
static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
{
return vq->acked_features & (1ULL << bit);
diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index ee0491f579ac..e059a9a47cdf 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -13,9 +13,11 @@
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/export.h>
+#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
#include <linux/bvec.h>
#include <linux/highmem.h>
#include <linux/vhost_iotlb.h>
+#endif
#include <uapi/linux/virtio_config.h>
static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
@@ -618,9 +620,9 @@ static inline int xfer_to_user(const struct vringh *vrh,
*/
int vringh_init_user(struct vringh *vrh, u64 features,
unsigned int num, bool weak_barriers,
- struct vring_desc __user *desc,
- struct vring_avail __user *avail,
- struct vring_used __user *used)
+ vring_desc_t __user *desc,
+ vring_avail_t __user *avail,
+ vring_used_t __user *used)
{
/* Sane power of 2 please! */
if (!num || num > 0xffff || (num & (num - 1))) {
@@ -1059,6 +1061,8 @@ int vringh_need_notify_kern(struct vringh *vrh)
}
EXPORT_SYMBOL(vringh_need_notify_kern);
+#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
+
static int iotlb_translate(const struct vringh *vrh,
u64 addr, u64 len, struct bio_vec iov[],
int iov_size, u32 perm)
@@ -1416,5 +1420,6 @@ int vringh_need_notify_iotlb(struct vringh *vrh)
}
EXPORT_SYMBOL(vringh_need_notify_iotlb);
+#endif
MODULE_LICENSE("GPL");
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 97669484a3f6..a483cec31d5c 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -91,7 +91,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
mutex_lock(&vq->mutex);
- if (!vq->private_data)
+ if (!vhost_vq_get_backend(vq))
goto out;
/* Avoid further vmexits, we're already processing the virtqueue */
@@ -181,14 +181,14 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
break;
}
- vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
- added = true;
-
- /* Deliver to monitoring devices all correctly transmitted
- * packets.
+ /* Deliver to monitoring devices all packets that we
+ * will transmit.
*/
virtio_transport_deliver_tap_pkt(pkt);
+ vhost_add_used(vq, head, sizeof(pkt->hdr) + payload_len);
+ added = true;
+
pkt->off += payload_len;
total_len += payload_len;
@@ -196,6 +196,12 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
* to send it with the next available buffer.
*/
if (pkt->off < pkt->len) {
+ /* We are queueing the same virtio_vsock_pkt to handle
+ * the remaining bytes, and we want to deliver it
+ * to monitoring devices in the next iteration.
+ */
+ pkt->tap_delivered = false;
+
spin_lock_bh(&vsock->send_pkt_list_lock);
list_add(&pkt->list, &vsock->send_pkt_list);
spin_unlock_bh(&vsock->send_pkt_list_lock);
@@ -440,7 +446,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
mutex_lock(&vq->mutex);
- if (!vq->private_data)
+ if (!vhost_vq_get_backend(vq))
goto out;
vhost_disable_notify(&vsock->dev, vq);
@@ -533,8 +539,8 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
goto err_vq;
}
- if (!vq->private_data) {
- vq->private_data = vsock;
+ if (!vhost_vq_get_backend(vq)) {
+ vhost_vq_set_backend(vq, vsock);
ret = vhost_vq_init_access(vq);
if (ret)
goto err_vq;
@@ -543,18 +549,23 @@ static int vhost_vsock_start(struct vhost_vsock *vsock)
mutex_unlock(&vq->mutex);
}
+ /* Some packets may have been queued before the device was started,
+ * let's kick the send worker to send them.
+ */
+ vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
+
mutex_unlock(&vsock->dev.mutex);
return 0;
err_vq:
- vq->private_data = NULL;
+ vhost_vq_set_backend(vq, NULL);
mutex_unlock(&vq->mutex);
for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
- vq->private_data = NULL;
+ vhost_vq_set_backend(vq, NULL);
mutex_unlock(&vq->mutex);
}
err:
@@ -577,7 +588,7 @@ static int vhost_vsock_stop(struct vhost_vsock *vsock)
struct vhost_virtqueue *vq = &vsock->vqs[i];
mutex_lock(&vq->mutex);
- vq->private_data = NULL;
+ vhost_vq_set_backend(vq, NULL);
mutex_unlock(&vq->mutex);
}
@@ -621,7 +632,7 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
- VHOST_VSOCK_WEIGHT, NULL);
+ VHOST_VSOCK_WEIGHT, true, NULL);
file->private_data = vsock;
spin_lock_init(&vsock->send_pkt_list_lock);