diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-10 13:42:09 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-10 13:42:09 -0700 |
commit | 09102704c67457c6cdea6c0394c34843484a852c (patch) | |
tree | c4faeb56ff6c94d50b56da17969b88b27c19eae5 /drivers | |
parent | 84fc461db99b2dc19e019c0a97725a3653687981 (diff) | |
parent | 044e4b09223039e571e6ec540e25552054208765 (diff) |
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull virtio updates from Michael Tsirkin:
- virtio-mem: paravirtualized memory hotplug
- support doorbell mapping for vdpa
- config interrupt support in ifc
- fixes all over the place
* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: (40 commits)
vhost/test: fix up after API change
virtio_mem: convert device block size into 64bit
virtio-mem: drop unnecessary initialization
ifcvf: implement config interrupt in IFCVF
vhost: replace -1 with VHOST_FILE_UNBIND in ioctls
vhost_vdpa: Support config interrupt in vdpa
ifcvf: ignore continuous setting same status value
virtio-mem: Don't rely on implicit compiler padding for requests
virtio-mem: Try to unplug the complete online memory block first
virtio-mem: Use -ETXTBSY as error code if the device is busy
virtio-mem: Unplug subblocks right-to-left
virtio-mem: Drop manual check for already present memory
virtio-mem: Add parent resource for all added "System RAM"
virtio-mem: Better retry handling
virtio-mem: Offline and remove completely unplugged memory blocks
mm/memory_hotplug: Introduce offline_and_remove_memory()
virtio-mem: Allow to offline partially unplugged memory blocks
mm: Allow to offline unmovable PageOffline() pages via MEM_GOING_OFFLINE
virtio-mem: Paravirtualized memory hotunplug part 2
virtio-mem: Paravirtualized memory hotunplug part 1
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/numa/srat.c | 1 | ||||
-rw-r--r-- | drivers/crypto/virtio/virtio_crypto_algs.c | 21 | ||||
-rw-r--r-- | drivers/misc/mic/Kconfig | 2 | ||||
-rw-r--r-- | drivers/net/caif/Kconfig | 2 | ||||
-rw-r--r-- | drivers/vdpa/Kconfig | 2 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/ifcvf_base.c | 3 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/ifcvf_base.h | 4 | ||||
-rw-r--r-- | drivers/vdpa/ifcvf/ifcvf_main.c | 146 | ||||
-rw-r--r-- | drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 | ||||
-rw-r--r-- | drivers/vhost/Kconfig | 17 | ||||
-rw-r--r-- | drivers/vhost/net.c | 2 | ||||
-rw-r--r-- | drivers/vhost/scsi.c | 2 | ||||
-rw-r--r-- | drivers/vhost/test.c | 2 | ||||
-rw-r--r-- | drivers/vhost/vdpa.c | 112 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 98 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 8 | ||||
-rw-r--r-- | drivers/vhost/vringh.c | 6 | ||||
-rw-r--r-- | drivers/vhost/vsock.c | 2 | ||||
-rw-r--r-- | drivers/virtio/Kconfig | 17 | ||||
-rw-r--r-- | drivers/virtio/Makefile | 1 | ||||
-rw-r--r-- | drivers/virtio/virtio_balloon.c | 9 | ||||
-rw-r--r-- | drivers/virtio/virtio_mem.c | 1965 | ||||
-rw-r--r-- | drivers/virtio/virtio_mmio.c | 4 | ||||
-rw-r--r-- | drivers/virtio/virtio_pci_modern.c | 1 |
24 files changed, 2315 insertions, 119 deletions
diff --git a/drivers/acpi/numa/srat.c b/drivers/acpi/numa/srat.c index 47b4969d9b93..5be5a977da1b 100644 --- a/drivers/acpi/numa/srat.c +++ b/drivers/acpi/numa/srat.c @@ -35,6 +35,7 @@ int pxm_to_node(int pxm) return NUMA_NO_NODE; return pxm_to_node_map[pxm]; } +EXPORT_SYMBOL(pxm_to_node); int node_to_pxm(int node) { diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c index fd045e64972a..cb8a6ea2a4bc 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_algs.c @@ -350,13 +350,18 @@ __virtio_crypto_skcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req, int err; unsigned long flags; struct scatterlist outhdr, iv_sg, status_sg, **sgs; - int i; u64 dst_len; unsigned int num_out = 0, num_in = 0; int sg_total; uint8_t *iv; + struct scatterlist *sg; src_nents = sg_nents_for_len(req->src, req->cryptlen); + if (src_nents < 0) { + pr_err("Invalid number of src SG.\n"); + return src_nents; + } + dst_nents = sg_nents(req->dst); pr_debug("virtio_crypto: Number of sgs (src_nents: %d, dst_nents: %d)\n", @@ -402,6 +407,7 @@ __virtio_crypto_skcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req, goto free; } + dst_len = min_t(unsigned int, req->cryptlen, dst_len); pr_debug("virtio_crypto: src_len: %u, dst_len: %llu\n", req->cryptlen, dst_len); @@ -442,12 +448,12 @@ __virtio_crypto_skcipher_do_req(struct virtio_crypto_sym_request *vc_sym_req, vc_sym_req->iv = iv; /* Source data */ - for (i = 0; i < src_nents; i++) - sgs[num_out++] = &req->src[i]; + for (sg = req->src; src_nents; sg = sg_next(sg), src_nents--) + sgs[num_out++] = sg; /* Destination data */ - for (i = 0; i < dst_nents; i++) - sgs[num_out + num_in++] = &req->dst[i]; + for (sg = req->dst; sg; sg = sg_next(sg)) + sgs[num_out + num_in++] = sg; /* Status */ sg_init_one(&status_sg, &vc_req->status, sizeof(vc_req->status)); @@ -577,10 +583,11 @@ static void virtio_crypto_skcipher_finalize_req( scatterwalk_map_and_copy(req->iv, req->dst, req->cryptlen - AES_BLOCK_SIZE, AES_BLOCK_SIZE, 0); - crypto_finalize_skcipher_request(vc_sym_req->base.dataq->engine, - req, err); kzfree(vc_sym_req->iv); virtcrypto_clear_request(&vc_sym_req->base); + + crypto_finalize_skcipher_request(vc_sym_req->base.dataq->engine, + req, err); } static struct virtio_crypto_algo virtio_crypto_algs[] = { { diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index 3bfe72c59864..8f201d019f5a 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -116,7 +116,7 @@ config MIC_COSM config VOP tristate "VOP Driver" - depends on VOP_BUS && VHOST_DPN + depends on VOP_BUS select VHOST_RING select VIRTIO help diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig index 1538ad194cf4..1f2f2e8209c3 100644 --- a/drivers/net/caif/Kconfig +++ b/drivers/net/caif/Kconfig @@ -50,7 +50,7 @@ config CAIF_HSI config CAIF_VIRTIO tristate "CAIF virtio transport driver" - depends on CAIF && HAS_DMA && VHOST_DPN + depends on CAIF && HAS_DMA select VHOST_RING select VIRTIO select GENERIC_ALLOCATOR diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index e8140065c8a5..3e1ceb8e9f2b 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -10,7 +10,7 @@ if VDPA config VDPA_SIM tristate "vDPA device simulator" - depends on RUNTIME_TESTING_MENU && HAS_DMA && VHOST_DPN + depends on RUNTIME_TESTING_MENU && HAS_DMA select VHOST_RING default n help diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index e24371d644b5..94bf0328b68d 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -185,6 +185,9 @@ void ifcvf_set_status(struct ifcvf_hw *hw, u8 status) void ifcvf_reset(struct ifcvf_hw *hw) { + hw->config_cb.callback = NULL; + hw->config_cb.private = NULL; + ifcvf_set_status(hw, 0); /* flush set_status, make sure VF is stopped, reset */ ifcvf_get_status(hw); diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index e80307092351..f4554412e607 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -27,6 +27,7 @@ ((1ULL << VIRTIO_NET_F_MAC) | \ (1ULL << VIRTIO_F_ANY_LAYOUT) | \ (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_NET_F_STATUS) | \ (1ULL << VIRTIO_F_ORDER_PLATFORM) | \ (1ULL << VIRTIO_F_IOMMU_PLATFORM) | \ (1ULL << VIRTIO_NET_F_MRG_RXBUF)) @@ -81,6 +82,9 @@ struct ifcvf_hw { void __iomem *net_cfg; struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2]; void __iomem * const *base; + char config_msix_name[256]; + struct vdpa_callback config_cb; + }; struct ifcvf_adapter { diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index abf6a061cab6..f5a60c14b979 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -18,6 +18,16 @@ #define DRIVER_AUTHOR "Intel Corporation" #define IFCVF_DRIVER_NAME "ifcvf" +static irqreturn_t ifcvf_config_changed(int irq, void *arg) +{ + struct ifcvf_hw *vf = arg; + + if (vf->config_cb.callback) + return vf->config_cb.callback(vf->config_cb.private); + + return IRQ_HANDLED; +} + static irqreturn_t ifcvf_intr_handler(int irq, void *arg) { struct vring_info *vring = arg; @@ -28,6 +38,68 @@ static irqreturn_t ifcvf_intr_handler(int irq, void *arg) return IRQ_HANDLED; } +static void ifcvf_free_irq_vectors(void *data) +{ + pci_free_irq_vectors(data); +} + +static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + int i; + + + for (i = 0; i < queues; i++) + devm_free_irq(&pdev->dev, vf->vring[i].irq, &vf->vring[i]); + + ifcvf_free_irq_vectors(pdev); +} + +static int ifcvf_request_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + int vector, i, ret, irq; + + ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR, + IFCVF_MAX_INTR, PCI_IRQ_MSIX); + if (ret < 0) { + IFCVF_ERR(pdev, "Failed to alloc IRQ vectors\n"); + return ret; + } + + snprintf(vf->config_msix_name, 256, "ifcvf[%s]-config\n", + pci_name(pdev)); + vector = 0; + irq = pci_irq_vector(pdev, vector); + ret = devm_request_irq(&pdev->dev, irq, + ifcvf_config_changed, 0, + vf->config_msix_name, vf); + + for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { + snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n", + pci_name(pdev), i); + vector = i + IFCVF_MSI_QUEUE_OFF; + irq = pci_irq_vector(pdev, vector); + ret = devm_request_irq(&pdev->dev, irq, + ifcvf_intr_handler, 0, + vf->vring[i].msix_name, + &vf->vring[i]); + if (ret) { + IFCVF_ERR(pdev, + "Failed to request irq for vq %d\n", i); + ifcvf_free_irq(adapter, i); + + return ret; + } + + vf->vring[i].irq = irq; + } + + return 0; +} + static int ifcvf_start_datapath(void *private) { struct ifcvf_hw *vf = ifcvf_private_to_vf(private); @@ -118,17 +190,37 @@ static void ifcvf_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) { struct ifcvf_adapter *adapter; struct ifcvf_hw *vf; + u8 status_old; + int ret; vf = vdpa_to_vf(vdpa_dev); adapter = dev_get_drvdata(vdpa_dev->dev.parent); + status_old = ifcvf_get_status(vf); - if (status == 0) { + if (status_old == status) + return; + + if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) && + !(status & VIRTIO_CONFIG_S_DRIVER_OK)) { ifcvf_stop_datapath(adapter); + ifcvf_free_irq(adapter, IFCVF_MAX_QUEUE_PAIRS * 2); + } + + if (status == 0) { ifcvf_reset_vring(adapter); return; } - if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && + !(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) { + ret = ifcvf_request_irq(adapter); + if (ret) { + status = ifcvf_get_status(vf); + status |= VIRTIO_CONFIG_S_FAILED; + ifcvf_set_status(vf, status); + return; + } + if (ifcvf_start_datapath(adapter) < 0) IFCVF_ERR(adapter->pdev, "Failed to set ifcvf vdpa status %u\n", @@ -254,7 +346,10 @@ static void ifcvf_vdpa_set_config(struct vdpa_device *vdpa_dev, static void ifcvf_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, struct vdpa_callback *cb) { - /* We don't support config interrupt */ + struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); + + vf->config_cb.callback = cb->callback; + vf->config_cb.private = cb->private; } /* @@ -284,38 +379,6 @@ static const struct vdpa_config_ops ifc_vdpa_ops = { .set_config_cb = ifcvf_vdpa_set_config_cb, }; -static int ifcvf_request_irq(struct ifcvf_adapter *adapter) -{ - struct pci_dev *pdev = adapter->pdev; - struct ifcvf_hw *vf = &adapter->vf; - int vector, i, ret, irq; - - - for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) { - snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n", - pci_name(pdev), i); - vector = i + IFCVF_MSI_QUEUE_OFF; - irq = pci_irq_vector(pdev, vector); - ret = devm_request_irq(&pdev->dev, irq, - ifcvf_intr_handler, 0, - vf->vring[i].msix_name, - &vf->vring[i]); - if (ret) { - IFCVF_ERR(pdev, - "Failed to request irq for vq %d\n", i); - return ret; - } - vf->vring[i].irq = irq; - } - - return 0; -} - -static void ifcvf_free_irq_vectors(void *data) -{ - pci_free_irq_vectors(data); -} - static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct device *dev = &pdev->dev; @@ -349,13 +412,6 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; } - ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR, - IFCVF_MAX_INTR, PCI_IRQ_MSIX); - if (ret < 0) { - IFCVF_ERR(pdev, "Failed to alloc irq vectors\n"); - return ret; - } - ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev); if (ret) { IFCVF_ERR(pdev, @@ -379,12 +435,6 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) adapter->pdev = pdev; adapter->vdpa.dma_dev = &pdev->dev; - ret = ifcvf_request_irq(adapter); - if (ret) { - IFCVF_ERR(pdev, "Failed to request MSI-X irq\n"); - goto err; - } - ret = ifcvf_init_hw(vf, pdev); if (ret) { IFCVF_ERR(pdev, "Failed to init IFCVF hw\n"); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 01c456f7c1f7..c7334cc65bb2 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -101,7 +101,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) { - vq->ready = 0; + vq->ready = false; vq->desc_addr = 0; vq->driver_addr = 0; vq->device_addr = 0; @@ -131,9 +131,10 @@ static void vdpasim_work(struct work_struct *work) vdpasim, work); struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; - size_t read, write, total_write; - int err; + ssize_t read, write; + size_t total_write; int pkts = 0; + int err; spin_lock(&vdpasim->lock); diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig index c4f273793595..2c75d164b827 100644 --- a/drivers/vhost/Kconfig +++ b/drivers/vhost/Kconfig @@ -13,15 +13,6 @@ config VHOST_RING This option is selected by any driver which needs to access the host side of a virtio ring. -config VHOST_DPN - bool - depends on !ARM || AEABI - default y - help - Anything selecting VHOST or VHOST_RING must depend on VHOST_DPN. - This excludes the deprecated ARM ABI since that forces a 4 byte - alignment on all structs - incompatible with virtio spec requirements. - config VHOST tristate select VHOST_IOTLB @@ -37,7 +28,7 @@ if VHOST_MENU config VHOST_NET tristate "Host kernel accelerator for virtio net" - depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) && VHOST_DPN + depends on NET && EVENTFD && (TUN || !TUN) && (TAP || !TAP) select VHOST ---help--- This kernel module can be loaded in host kernel to accelerate @@ -49,7 +40,7 @@ config VHOST_NET config VHOST_SCSI tristate "VHOST_SCSI TCM fabric driver" - depends on TARGET_CORE && EVENTFD && VHOST_DPN + depends on TARGET_CORE && EVENTFD select VHOST default n ---help--- @@ -58,7 +49,7 @@ config VHOST_SCSI config VHOST_VSOCK tristate "vhost virtio-vsock driver" - depends on VSOCKETS && EVENTFD && VHOST_DPN + depends on VSOCKETS && EVENTFD select VHOST select VIRTIO_VSOCKETS_COMMON default n @@ -72,7 +63,7 @@ config VHOST_VSOCK config VHOST_VDPA tristate "Vhost driver for vDPA-based backend" - depends on EVENTFD && VHOST_DPN + depends on EVENTFD select VHOST depends on VDPA help diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 516519dcc8ff..e992decfec53 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1327,7 +1327,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) } vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, UIO_MAXIOV + VHOST_NET_BATCH, - VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, + VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT, true, NULL); vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 8b104f76f324..6fb4d7ecfa19 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1628,7 +1628,7 @@ static int vhost_scsi_open(struct inode *inode, struct file *f) vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; } vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV, - VHOST_SCSI_WEIGHT, 0, NULL); + VHOST_SCSI_WEIGHT, 0, true, NULL); vhost_scsi_init_inflight(vs, NULL); diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 9a3a09005e03..0466921f4772 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -120,7 +120,7 @@ static int vhost_test_open(struct inode *inode, struct file *f) vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, - VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, NULL); + VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT, true, NULL); f->private_data = n; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 40e2a5747c98..7580e34f76c1 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -15,12 +15,14 @@ #include <linux/module.h> #include <linux/cdev.h> #include <linux/device.h> +#include <linux/mm.h> #include <linux/iommu.h> #include <linux/uuid.h> #include <linux/vdpa.h> #include <linux/nospec.h> #include <linux/vhost.h> #include <linux/virtio_net.h> +#include <linux/kernel.h> #include "vhost.h" @@ -70,6 +72,7 @@ struct vhost_vdpa { int nvqs; int virtio_id; int minor; + struct eventfd_ctx *config_ctx; }; static DEFINE_IDA(vhost_vdpa_ida); @@ -101,6 +104,17 @@ static irqreturn_t vhost_vdpa_virtqueue_cb(void *private) return IRQ_HANDLED; } +static irqreturn_t vhost_vdpa_config_cb(void *private) +{ + struct vhost_vdpa *v = private; + struct eventfd_ctx *config_ctx = v->config_ctx; + + if (config_ctx) + eventfd_signal(config_ctx, 1); + + return IRQ_HANDLED; +} + static void vhost_vdpa_reset(struct vhost_vdpa *v) { struct vdpa_device *vdpa = v->vdpa; @@ -288,6 +302,36 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) return 0; } +static void vhost_vdpa_config_put(struct vhost_vdpa *v) +{ + if (v->config_ctx) + eventfd_ctx_put(v->config_ctx); +} + +static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vdpa_callback cb; + int fd; + struct eventfd_ctx *ctx; + + cb.callback = vhost_vdpa_config_cb; + cb.private = v->vdpa; + if (copy_from_user(&fd, argp, sizeof(fd))) + return -EFAULT; + + ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); + swap(ctx, v->config_ctx); + + if (!IS_ERR_OR_NULL(ctx)) + eventfd_ctx_put(ctx); + + if (IS_ERR(v->config_ctx)) + return PTR_ERR(v->config_ctx); + + v->vdpa->config->set_config_cb(v->vdpa, &cb); + + return 0; +} static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -395,6 +439,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_SET_LOG_FD: r = -ENOIOCTLCMD; break; + case VHOST_VDPA_SET_CONFIG_CALL: + r = vhost_vdpa_set_config_call(v, argp); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) @@ -694,7 +741,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) vqs[i] = &v->vqs[i]; vqs[i]->handle_kick = handle_vq_kick; } - vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, + vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false, vhost_vdpa_process_iotlb_msg); dev->iotlb = vhost_iotlb_alloc(0, 0); @@ -729,6 +776,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_dev_stop(&v->vdev); vhost_vdpa_iotlb_free(v); vhost_vdpa_free_domain(v); + vhost_vdpa_config_put(v); vhost_dev_cleanup(&v->vdev); kfree(v->vdev.vqs); mutex_unlock(&d->mutex); @@ -739,12 +787,74 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) return 0; } +#ifdef CONFIG_MMU +static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) +{ + struct vhost_vdpa *v = vmf->vma->vm_file->private_data; + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_notification_area notify; + struct vm_area_struct *vma = vmf->vma; + u16 index = vma->vm_pgoff; + + notify = ops->get_vq_notification(vdpa, index); + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (remap_pfn_range(vma, vmf->address & PAGE_MASK, + notify.addr >> PAGE_SHIFT, PAGE_SIZE, + vma->vm_page_prot)) + return VM_FAULT_SIGBUS; + + return VM_FAULT_NOPAGE; +} + +static const struct vm_operations_struct vhost_vdpa_vm_ops = { + .fault = vhost_vdpa_fault, +}; + +static int vhost_vdpa_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct vhost_vdpa *v = vma->vm_file->private_data; + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + struct vdpa_notification_area notify; + int index = vma->vm_pgoff; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + if (vma->vm_flags & VM_READ) + return -EINVAL; + if (index > 65535) + return -EINVAL; + if (!ops->get_vq_notification) + return -ENOTSUPP; + + /* To be safe and easily modelled by userspace, We only + * support the doorbell which sits on the page boundary and + * does not share the page with other registers. + */ + notify = ops->get_vq_notification(vdpa, index); + if (notify.addr & (PAGE_SIZE - 1)) + return -EINVAL; + if (vma->vm_end - vma->vm_start != notify.size) + return -ENOTSUPP; + + vma->vm_ops = &vhost_vdpa_vm_ops; + return 0; +} +#endif /* CONFIG_MMU */ + static const struct file_operations vhost_vdpa_fops = { .owner = THIS_MODULE, .open = vhost_vdpa_open, .release = vhost_vdpa_release, .write_iter = vhost_vdpa_chr_write_iter, .unlocked_ioctl = vhost_vdpa_unlocked_ioctl, +#ifdef CONFIG_MMU + .mmap = vhost_vdpa_mmap, +#endif /* CONFIG_MMU */ .compat_ioctl = compat_ptr_ioctl, }; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 596132a96cd5..062595ee1f83 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -166,11 +166,16 @@ static int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); + struct vhost_work *work = &poll->work; if (!(key_to_poll(key) & poll->mask)) return 0; - vhost_poll_queue(poll); + if (!poll->dev->use_worker) + work->fn(work); + else + vhost_poll_queue(poll); + return 0; } @@ -454,6 +459,7 @@ static size_t vhost_get_desc_size(struct vhost_virtqueue *vq, void vhost_dev_init(struct vhost_dev *dev, struct vhost_virtqueue **vqs, int nvqs, int iov_limit, int weight, int byte_weight, + bool use_worker, int (*msg_handler)(struct vhost_dev *dev, struct vhost_iotlb_msg *msg)) { @@ -471,6 +477,7 @@ void vhost_dev_init(struct vhost_dev *dev, dev->iov_limit = iov_limit; dev->weight = weight; dev->byte_weight = byte_weight; + dev->use_worker = use_worker; dev->msg_handler = msg_handler; init_llist_head(&dev->work_list); init_waitqueue_head(&dev->wait); @@ -534,6 +541,36 @@ bool vhost_dev_has_owner(struct vhost_dev *dev) } EXPORT_SYMBOL_GPL(vhost_dev_has_owner); +static void vhost_attach_mm(struct vhost_dev *dev) +{ + /* No owner, become one */ + if (dev->use_worker) { + dev->mm = get_task_mm(current); + } else { + /* vDPA device does not use worker thead, so there's + * no need to hold the address space for mm. This help + * to avoid deadlock in the case of mmap() which may + * held the refcnt of the file and depends on release + * method to remove vma. + */ + dev->mm = current->mm; + mmgrab(dev->mm); + } +} + +static void vhost_detach_mm(struct vhost_dev *dev) +{ + if (!dev->mm) + return; + + if (dev->use_worker) + mmput(dev->mm); + else + mmdrop(dev->mm); + + dev->mm = NULL; +} + /* Caller should have device mutex */ long vhost_dev_set_owner(struct vhost_dev *dev) { @@ -546,21 +583,24 @@ long vhost_dev_set_owner(struct vhost_dev *dev) goto err_mm; } - /* No owner, become one */ - dev->mm = get_task_mm(current); + vhost_attach_mm(dev); + dev->kcov_handle = kcov_common_handle(); - worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); - if (IS_ERR(worker)) { - err = PTR_ERR(worker); - goto err_worker; - } + if (dev->use_worker) { + worker = kthread_create(vhost_worker, dev, + "vhost-%d", current->pid); + if (IS_ERR(worker)) { + err = PTR_ERR(worker); + goto err_worker; + } - dev->worker = worker; - wake_up_process(worker); /* avoid contributing to loadavg */ + dev->worker = worker; + wake_up_process(worker); /* avoid contributing to loadavg */ - err = vhost_attach_cgroups(dev); - if (err) - goto err_cgroup; + err = vhost_attach_cgroups(dev); + if (err) + goto err_cgroup; + } err = vhost_dev_alloc_iovecs(dev); if (err) @@ -568,12 +608,12 @@ long vhost_dev_set_owner(struct vhost_dev *dev) return 0; err_cgroup: - kthread_stop(worker); - dev->worker = NULL; + if (dev->worker) { + kthread_stop(dev->worker); + dev->worker = NULL; + } err_worker: - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; + vhost_detach_mm(dev); dev->kcov_handle = 0; err_mm: return err; @@ -670,9 +710,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev) dev->worker = NULL; dev->kcov_handle = 0; } - if (dev->mm) - mmput(dev->mm); - dev->mm = NULL; + vhost_detach_mm(dev); } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); @@ -882,7 +920,7 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, #define vhost_put_user(vq, x, ptr) \ ({ \ - int ret = -EFAULT; \ + int ret; \ if (!vq->iotlb) { \ ret = __put_user(x, ptr); \ } else { \ @@ -1244,9 +1282,9 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) } static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used) + vring_desc_t __user *desc, + vring_avail_t __user *avail, + vring_used_t __user *used) { return access_ok(desc, vhost_get_desc_size(vq, num)) && @@ -1574,7 +1612,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd); + eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); if (IS_ERR(eventfp)) { r = PTR_ERR(eventfp); break; @@ -1590,7 +1628,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -1602,7 +1640,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg r = -EFAULT; break; } - ctx = f.fd == -1 ? NULL : eventfd_ctx_fdget(f.fd); + ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); if (IS_ERR(ctx)) { r = PTR_ERR(ctx); break; @@ -1727,7 +1765,7 @@ long vhost_de |