From 5b8e432dbb0e20bd8e99033f4a0bfa0d38c0e08e Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Tue, 15 Jan 2019 00:41:43 +0800 Subject: xen/blkback: add stack variable 'blkif' in connect_ring() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As 'be->blkif' is used for many times in connect_ring(), the stack variable 'blkif' is added to substitute 'be-blkif'. Suggested-by: Paul Durrant Signed-off-by: Dongli Zhang Reviewed-by: Paul Durrant Reviewed-by: Roger Pau Monné Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index a4bc74e72c39..a4aadac772e5 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -1023,6 +1023,7 @@ fail: static int connect_ring(struct backend_info *be) { struct xenbus_device *dev = be->dev; + struct xen_blkif *blkif = be->blkif; unsigned int pers_grants; char protocol[64] = ""; int err, i; @@ -1033,25 +1034,25 @@ static int connect_ring(struct backend_info *be) pr_debug("%s %s\n", __func__, dev->otherend); - be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; + blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT; err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol", "%63s", protocol); if (err <= 0) strcpy(protocol, "unspecified, assuming default"); else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE)) - be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; + blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32)) - be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; + blkif->blk_protocol = BLKIF_PROTOCOL_X86_32; else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64)) - be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; + blkif->blk_protocol = BLKIF_PROTOCOL_X86_64; else { xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -ENOSYS; } pers_grants = xenbus_read_unsigned(dev->otherend, "feature-persistent", 0); - be->blkif->vbd.feature_gnt_persistent = pers_grants; - be->blkif->vbd.overflow_max_grants = 0; + blkif->vbd.feature_gnt_persistent = pers_grants; + blkif->vbd.overflow_max_grants = 0; /* * Read the number of hardware queues from frontend. @@ -1067,16 +1068,16 @@ static int connect_ring(struct backend_info *be) requested_num_queues, xenblk_max_queues); return -ENOSYS; } - be->blkif->nr_rings = requested_num_queues; - if (xen_blkif_alloc_rings(be->blkif)) + blkif->nr_rings = requested_num_queues; + if (xen_blkif_alloc_rings(blkif)) return -ENOMEM; pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename, - be->blkif->nr_rings, be->blkif->blk_protocol, protocol, + blkif->nr_rings, blkif->blk_protocol, protocol, pers_grants ? "persistent grants" : ""); - if (be->blkif->nr_rings == 1) - return read_per_ring_refs(&be->blkif->rings[0], dev->otherend); + if (blkif->nr_rings == 1) + return read_per_ring_refs(&blkif->rings[0], dev->otherend); else { xspathsize = strlen(dev->otherend) + xenstore_path_ext_size; xspath = kmalloc(xspathsize, GFP_KERNEL); @@ -1085,10 +1086,10 @@ static int connect_ring(struct backend_info *be) return -ENOMEM; } - for (i = 0; i < be->blkif->nr_rings; i++) { + for (i = 0; i < blkif->nr_rings; i++) { memset(xspath, 0, xspathsize); snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i); - err = read_per_ring_refs(&be->blkif->rings[i], xspath); + err = read_per_ring_refs(&blkif->rings[i], xspath); if (err) { kfree(xspath); return err; -- cgit v1.2.3 From 4a8c31a1c6f526ec96a35e613f2a71e26ffbd7dd Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Sun, 24 Feb 2019 10:17:27 -0500 Subject: xen/blkback: rework connect_ring() to avoid inconsistent xenstore 'ring-page-order' set by malicious blkfront The xenstore 'ring-page-order' is used globally for each blkback queue and therefore should be read from xenstore only once. However, it is obtained in read_per_ring_refs() which might be called multiple times during the initialization of each blkback queue. If the blkfront is malicious and the 'ring-page-order' is set in different value by blkfront every time before blkback reads it, this may end up at the "WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));" in xen_blkif_disconnect() when frontend is destroyed. This patch reworks connect_ring() to read xenstore 'ring-page-order' only once. Signed-off-by: Dongli Zhang Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 72 +++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 29 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index a4aadac772e5..24896ffb04ed 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -926,7 +926,7 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir) int err, i, j; struct xen_blkif *blkif = ring->blkif; struct xenbus_device *dev = blkif->be->dev; - unsigned int ring_page_order, nr_grefs, evtchn; + unsigned int nr_grefs, evtchn; err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u", &evtchn); @@ -936,43 +936,42 @@ static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir) return err; } - err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", - &ring_page_order); - if (err != 1) { - err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]); + nr_grefs = blkif->nr_ring_pages; + + if (unlikely(!nr_grefs)) { + WARN_ON(true); + return -EINVAL; + } + + for (i = 0; i < nr_grefs; i++) { + char ring_ref_name[RINGREF_NAME_LEN]; + + snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); + err = xenbus_scanf(XBT_NIL, dir, ring_ref_name, + "%u", &ring_ref[i]); + if (err != 1) { + if (nr_grefs == 1) + break; + err = -EINVAL; - xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir); + xenbus_dev_fatal(dev, err, "reading %s/%s", + dir, ring_ref_name); return err; } - nr_grefs = 1; - } else { - unsigned int i; + } + + if (err != 1) { + WARN_ON(nr_grefs != 1); - if (ring_page_order > xen_blkif_max_ring_order) { + err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", + &ring_ref[0]); + if (err != 1) { err = -EINVAL; - xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d", - dir, ring_page_order, - xen_blkif_max_ring_order); + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir); return err; } - - nr_grefs = 1 << ring_page_order; - for (i = 0; i < nr_grefs; i++) { - char ring_ref_name[RINGREF_NAME_LEN]; - - snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i); - err = xenbus_scanf(XBT_NIL, dir, ring_ref_name, - "%u", &ring_ref[i]); - if (err != 1) { - err = -EINVAL; - xenbus_dev_fatal(dev, err, "reading %s/%s", - dir, ring_ref_name); - return err; - } - } } - blkif->nr_ring_pages = nr_grefs; for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) { req = kzalloc(sizeof(*req), GFP_KERNEL); @@ -1031,6 +1030,7 @@ static int connect_ring(struct backend_info *be) size_t xspathsize; const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */ unsigned int requested_num_queues = 0; + unsigned int ring_page_order; pr_debug("%s %s\n", __func__, dev->otherend); @@ -1076,6 +1076,20 @@ static int connect_ring(struct backend_info *be) blkif->nr_rings, blkif->blk_protocol, protocol, pers_grants ? "persistent grants" : ""); + ring_page_order = xenbus_read_unsigned(dev->otherend, + "ring-page-order", 0); + + if (ring_page_order > xen_blkif_max_ring_order) { + err = -EINVAL; + xenbus_dev_fatal(dev, err, + "requested ring page order %d exceed max:%d", + ring_page_order, + xen_blkif_max_ring_order); + return err; + } + + blkif->nr_ring_pages = 1 << ring_page_order; + if (blkif->nr_rings == 1) return read_per_ring_refs(&blkif->rings[0], dev->otherend); else { -- cgit v1.2.3 From 6ce59025f1182125e75c8d121daf44056b65dd1f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Mar 2019 08:08:43 -0600 Subject: paride/pf: cleanup queues when detection fails The driver allocates queues for all the units it potentially supports. But if we fail to detect any drives, then we fail loading the module without cleaning up those queues. This is now evident with the switch to blk-mq, though the bug has been there forever as far as I can tell. Also fix cleanup through regular module exit. Reported-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jens Axboe --- drivers/block/paride/pf.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index e92e7a8eeeb2..103b617cdc31 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -761,8 +761,12 @@ static int pf_detect(void) return 0; printk("%s: No ATAPI disk detected\n", name); - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) + for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + blk_cleanup_queue(pf->disk->queue); + pf->disk->queue = NULL; + blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); + } pi_unregister_driver(par_drv); return -1; } @@ -1047,13 +1051,15 @@ static void __exit pf_exit(void) int unit; unregister_blkdev(major, name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { - if (!pf->present) - continue; - del_gendisk(pf->disk); + if (pf->present) + del_gendisk(pf->disk); + blk_cleanup_queue(pf->disk->queue); blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); - pi_release(pf->pi); + + if (pf->present) + pi_release(pf->pi); } } -- cgit v1.2.3 From 81b74ac68c28fddb3589ad5d4d5e587baf4bb781 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Mar 2019 08:10:32 -0600 Subject: paride/pcd: cleanup queues when detection fails The driver allocates queues for all the units it potentially supports. But if we fail to detect any drives, then we fail loading the module without cleaning up those queues. This is now evident with the switch to blk-mq, though the bug has been there forever as far as I can tell. Also fix cleanup through regular module exit. Reported-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 96670eefaeb2..377a694dc228 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -749,8 +749,12 @@ static int pcd_detect(void) return 0; printk("%s: No CD-ROM drive found\n", name); - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) + for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + blk_cleanup_queue(cd->disk->queue); + cd->disk->queue = NULL; + blk_mq_free_tag_set(&cd->tag_set); put_disk(cd->disk); + } pi_unregister_driver(par_drv); return -1; } -- cgit v1.2.3 From f7c8a4120eedf24c36090b7542b179ff7a649219 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Mon, 18 Mar 2019 20:23:17 +0800 Subject: loop: access lo_backing_file only when the loop device is Lo_bound Commit 758a58d0bc67 ("loop: set GENHD_FL_NO_PART_SCAN after blkdev_reread_part()") separates "lo->lo_backing_file = NULL" and "lo->lo_state = Lo_unbound" into different critical regions protected by loop_ctl_mutex. However, there is below race that the NULL lo->lo_backing_file would be accessed when the backend of a loop is another loop device, e.g., loop0's backend is a file, while loop1's backend is loop0. loop0's backend is file loop1's backend is loop0 __loop_clr_fd() mutex_lock(&loop_ctl_mutex); lo->lo_backing_file = NULL; --> set to NULL mutex_unlock(&loop_ctl_mutex); loop_set_fd() mutex_lock_killable(&loop_ctl_mutex); loop_validate_file() f = l->lo_backing_file; --> NULL access if loop0 is not Lo_unbound mutex_lock(&loop_ctl_mutex); lo->lo_state = Lo_unbound; mutex_unlock(&loop_ctl_mutex); lo->lo_backing_file should be accessed only when the loop device is Lo_bound. In fact, the problem has been introduced already in commit 7ccd0791d985 ("loop: Push loop_ctl_mutex down into loop_clr_fd()") after which loop_validate_file() could see devices in Lo_rundown state with which it did not count. It was harmless at that point but still. Fixes: 7ccd0791d985 ("loop: Push loop_ctl_mutex down into loop_clr_fd()") Reported-by: syzbot+9bdc1adc1c55e7fe765b@syzkaller.appspotmail.com Signed-off-by: Dongli Zhang Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1e6edd568214..bf1c61cab8eb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -656,7 +656,7 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) return -EBADF; l = f->f_mapping->host->i_bdev->bd_disk->private_data; - if (l->lo_state == Lo_unbound) { + if (l->lo_state != Lo_bound) { return -EINVAL; } f = l->lo_backing_file; -- cgit v1.2.3 From 16d80c54ad42c573a897ae7bcf5a9816be54e6fe Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 Mar 2019 14:50:04 +0100 Subject: rbd: set io_min, io_opt and discard_granularity to alloc_size Now that we have alloc_size that controls our discard behavior, it doesn't make sense to have these set to object (set) size. alloc_size defaults to 64k, but because discard_granularity is likely 4M, only ranges that are equal to or bigger than 4M can be considered during fstrim. A smaller io_min is also more likely to be met, resulting in fewer deferred writes on bluestore OSDs. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- drivers/block/rbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4ba967d65cf9..3b2c9289dccb 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -833,7 +833,7 @@ static int parse_rbd_opts_token(char *c, void *private) pctx->opts->queue_depth = intval; break; case Opt_alloc_size: - if (intval < 1) { + if (intval < SECTOR_SIZE) { pr_err("alloc_size out of range\n"); return -EINVAL; } @@ -4203,12 +4203,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.max_sectors = queue_max_hw_sectors(q); blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, UINT_MAX); - blk_queue_io_min(q, objset_bytes); - blk_queue_io_opt(q, objset_bytes); + blk_queue_io_min(q, rbd_dev->opts->alloc_size); + blk_queue_io_opt(q, rbd_dev->opts->alloc_size); if (rbd_dev->opts->trim) { blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.discard_granularity = objset_bytes; + q->limits.discard_granularity = rbd_dev->opts->alloc_size; blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); } -- cgit v1.2.3 From 9d4a227f6ef189cf37eb22641f6ee788b7dc41bb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 20 Mar 2019 10:58:05 +0100 Subject: rbd: drop wait_for_latest_osdmap() Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- drivers/block/rbd.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3b2c9289dccb..2210c1b9491b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -924,23 +924,6 @@ static void rbd_put_client(struct rbd_client *rbdc) kref_put(&rbdc->kref, rbd_client_release); } -static int wait_for_latest_osdmap(struct ceph_client *client) -{ - u64 newest_epoch; - int ret; - - ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); - if (ret) - return ret; - - if (client->osdc.osdmap->epoch >= newest_epoch) - return 0; - - ceph_osdc_maybe_request_map(&client->osdc); - return ceph_monc_wait_osdmap(&client->monc, newest_epoch, - client->options->mount_timeout); -} - /* * Get a ceph client with specific addr and configuration, if one does * not exist create it. Either way, ceph_opts is consumed by this @@ -960,7 +943,8 @@ static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) * Using an existing client. Make sure ->pg_pools is up to * date before we look up the pool id in do_rbd_add(). */ - ret = wait_for_latest_osdmap(rbdc->client); + ret = ceph_wait_for_latest_osdmap(rbdc->client, + rbdc->client->options->mount_timeout); if (ret) { rbd_warn(NULL, "failed to get latest osdmap: %d", ret); rbd_put_client(rbdc); -- cgit v1.2.3