From d9dd73087a8b6d78d3bf8411620306f2313cdbae Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 11 Nov 2019 11:39:22 +0900 Subject: block: Enhance blk_revalidate_disk_zones() For ZBC and ZAC zoned devices, the scsi driver revalidation processing implemented by sd_revalidate_disk() includes a call to sd_zbc_read_zones() which executes a full disk zone report used to check that all zones of the disk are the same size. This processing is followed by a call to blk_revalidate_disk_zones(), used to initialize the device request queue zone bitmaps (zone type and zone write lock bitmaps). To do so, blk_revalidate_disk_zones() also executes a full device zone report to obtain zone types. As a result, the entire zoned block device revalidation process includes two full device zone report. By moving the zone size checks into blk_revalidate_disk_zones(), this process can be optimized to a single full device zone report, leading to shorter device scan and revalidation times. This patch implements this optimization, reducing the original full device zone report implemented in sd_zbc_check_zones() to a single, small, report zones command execution to obtain the size of the first zone of the device. Checks whether all zones of the device are the same size as the first zone size are moved to the generic blk_check_zone() function called from blk_revalidate_disk_zones(). This optimization also has the following benefits: 1) fewer memory allocations in the scsi layer during disk revalidation as the potentailly large buffer for zone report execution is not needed. 2) Implement zone checks in a generic manner, reducing the burden on device driver which only need to obtain the zone size and check that this size is a power of 2 number of LBAs. Any new type of zoned block device will benefit from this. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-zoned.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 481eaf7d04d4..dae787f67019 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -448,6 +448,58 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q) q->seq_zones_wlock = NULL; } +/* + * Helper function to check the validity of zones of a zoned block device. + */ +static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone, + sector_t *sector) +{ + struct request_queue *q = disk->queue; + sector_t zone_sectors = blk_queue_zone_sectors(q); + sector_t capacity = get_capacity(disk); + + /* + * All zones must have the same size, with the exception on an eventual + * smaller last zone. + */ + if (zone->start + zone_sectors < capacity && + zone->len != zone_sectors) { + pr_warn("%s: Invalid zoned device with non constant zone size\n", + disk->disk_name); + return false; + } + + if (zone->start + zone->len >= capacity && + zone->len > zone_sectors) { + pr_warn("%s: Invalid zoned device with larger last zone size\n", + disk->disk_name); + return false; + } + + /* Check for holes in the zone report */ + if (zone->start != *sector) { + pr_warn("%s: Zone gap at sectors %llu..%llu\n", + disk->disk_name, *sector, zone->start); + return false; + } + + /* Check zone type */ + switch (zone->type) { + case BLK_ZONE_TYPE_CONVENTIONAL: + case BLK_ZONE_TYPE_SEQWRITE_REQ: + case BLK_ZONE_TYPE_SEQWRITE_PREF: + break; + default: + pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", + disk->disk_name, (int)zone->type, zone->start); + return false; + } + + *sector += zone->len; + + return true; +} + /** * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps * @disk: Target disk @@ -497,7 +549,10 @@ int blk_revalidate_disk_zones(struct gendisk *disk) if (!seq_zones_bitmap) goto out; - /* Get zone information and initialize seq_zones_bitmap */ + /* + * Get zone information to check the zones and initialize + * seq_zones_bitmap. + */ rep_nr_zones = nr_zones; zones = blk_alloc_zones(&rep_nr_zones); if (!zones) @@ -511,11 +566,14 @@ int blk_revalidate_disk_zones(struct gendisk *disk) if (!nrz) break; for (i = 0; i < nrz; i++) { + if (!blk_zone_valid(disk, &zones[i], §or)) { + ret = -ENODEV; + goto out; + } if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL) set_bit(z, seq_zones_bitmap); z++; } - sector += nrz * blk_queue_zone_sectors(q); } if (WARN_ON(z != nr_zones)) { -- cgit v1.2.3 From c98c3d09fca41323765af0dc7926b150cf29ebff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Nov 2019 11:39:23 +0900 Subject: block: cleanup the !zoned case in blk_revalidate_disk_zones blk_revalidate_disk_zones is never called for non-zoned devices. Just return early and warn instead of trying to handle this case. Signed-off-by: Christoph Hellwig Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- block/blk-zoned.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index dae787f67019..523a28d7a15c 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -520,6 +520,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk) sector_t sector = 0; int ret = 0; + if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) + return -EIO; + /* * BIO based queues do not use a scheduler so only q->nr_zones * needs to be updated so that the sysfs exposed value is correct. @@ -535,10 +538,8 @@ int blk_revalidate_disk_zones(struct gendisk *disk) */ noio_flag = memalloc_noio_save(); - if (!blk_queue_is_zoned(q) || !nr_zones) { - nr_zones = 0; + if (!nr_zones) goto update; - } /* Allocate bitmaps */ ret = -ENOMEM; -- cgit v1.2.3 From ceeb373aa6b9eb75ed3278d4b3ff2318c304e70c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 11 Nov 2019 11:39:24 +0900 Subject: block: Simplify report zones execution All kernel users of blkdev_report_zones() as well as applications use through ioctl(BLKZONEREPORT) expect to potentially get less zone descriptors than requested. As such, the use of the internal report zones command execution loop implemented by blk_report_zones() is not necessary and can even be harmful to performance by causing the execution of inefficient small zones report command to service the reminder of a requested zone array. This patch removes blk_report_zones(), simplifying the code. Also remove a now incorrect comment in dm_blk_report_zones(). Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Javier Gonzalez Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- block/blk-zoned.c | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) (limited to 'block') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 523a28d7a15c..ea4e086ba00e 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -119,31 +119,6 @@ static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep) return true; } -static int blk_report_zones(struct gendisk *disk, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones) -{ - struct request_queue *q = disk->queue; - unsigned int z = 0, n, nrz = *nr_zones; - sector_t capacity = get_capacity(disk); - int ret; - - while (z < nrz && sector < capacity) { - n = nrz - z; - ret = disk->fops->report_zones(disk, sector, &zones[z], &n); - if (ret) - return ret; - if (!n) - break; - sector += blk_queue_zone_sectors(q) * n; - z += n; - } - - WARN_ON(z > *nr_zones); - *nr_zones = z; - - return 0; -} - /** * blkdev_report_zones - Get zones information * @bdev: Target block device @@ -164,6 +139,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, struct blk_zone *zones, unsigned int *nr_zones) { struct request_queue *q = bdev_get_queue(bdev); + struct gendisk *disk = bdev->bd_disk; unsigned int i, nrz; int ret; @@ -175,7 +151,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, * report_zones method. If it does not have one defined, the device * driver has a bug. So warn about that. */ - if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zones)) + if (WARN_ON_ONCE(!disk->fops->report_zones)) return -EOPNOTSUPP; if (!*nr_zones || sector >= bdev->bd_part->nr_sects) { @@ -185,8 +161,8 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, nrz = min(*nr_zones, __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector)); - ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector, - zones, &nrz); + ret = disk->fops->report_zones(disk, get_start_sect(bdev) + sector, + zones, &nrz); if (ret) return ret; @@ -561,7 +537,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) while (z < nr_zones) { nrz = min(nr_zones - z, rep_nr_zones); - ret = blk_report_zones(disk, sector, zones, &nrz); + ret = disk->fops->report_zones(disk, sector, zones, &nrz); if (ret) goto out; if (!nrz) -- cgit v1.2.3 From 5eac3eb30c9ab9ee7fe2bd9aa9db6373cabb77f8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 11 Nov 2019 11:39:25 +0900 Subject: block: Remove partition support for zoned block devices No known partitioning tool supports zoned block devices, especially the host managed flavor with strong sequential write constraints. Furthermore, there are also no known user nor use cases for partitioned zoned block devices. This patch removes partition device creation for zoned block devices, which allows simplifying the processing of zone commands for zoned block devices. A warning is added if a partition table is found on the device. For report zones operations no zone sector information remapping is necessary anymore, simplifying the code. Of note is that remapping of zone reports for DM targets is still necessary as done by dm_remap_zone_report(). Similarly, remaping of a zone reset bio is not necessary anymore. Testing for the applicability of the zone reset all request also becomes simpler and only needs to check that the number of sectors of the requested zone range is equal to the disk capacity. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/blk-core.c | 6 +--- block/blk-zoned.c | 62 +++++++-------------------------------- block/partition-generic.c | 74 ++++++----------------------------------------- 3 files changed, 21 insertions(+), 121 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index f0d82227a2fc..a1e228752083 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -851,11 +851,7 @@ static inline int blk_partition_remap(struct bio *bio) if (unlikely(bio_check_ro(bio, p))) goto out; - /* - * Zone management bios do not have a sector count but they do have - * a start sector filled out and need to be remapped. - */ - if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) { + if (bio_sectors(bio)) { if (bio_check_eod(bio, part_nr_sects_read(p))) goto out; bio->bi_iter.bi_sector += p->start_sect; diff --git a/block/blk-zoned.c b/block/blk-zoned.c index ea4e086ba00e..ae665e490858 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -93,32 +93,10 @@ unsigned int blkdev_nr_zones(struct block_device *bdev) if (!blk_queue_is_zoned(q)) return 0; - return __blkdev_nr_zones(q, bdev->bd_part->nr_sects); + return __blkdev_nr_zones(q, get_capacity(bdev->bd_disk)); } EXPORT_SYMBOL_GPL(blkdev_nr_zones); -/* - * Check that a zone report belongs to this partition, and if yes, fix its start - * sector and write pointer and return true. Return false otherwise. - */ -static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep) -{ - sector_t offset = get_start_sect(bdev); - - if (rep->start < offset) - return false; - - rep->start -= offset; - if (rep->start + rep->len > bdev->bd_part->nr_sects) - return false; - - if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL) - rep->wp = rep->start + rep->len; - else - rep->wp -= offset; - return true; -} - /** * blkdev_report_zones - Get zones information * @bdev: Target block device @@ -140,8 +118,7 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, { struct request_queue *q = bdev_get_queue(bdev); struct gendisk *disk = bdev->bd_disk; - unsigned int i, nrz; - int ret; + sector_t capacity = get_capacity(disk); if (!blk_queue_is_zoned(q)) return -EOPNOTSUPP; @@ -154,27 +131,14 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, if (WARN_ON_ONCE(!disk->fops->report_zones)) return -EOPNOTSUPP; - if (!*nr_zones || sector >= bdev->bd_part->nr_sects) { + if (!*nr_zones || sector >= capacity) { *nr_zones = 0; return 0; } - nrz = min(*nr_zones, - __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector)); - ret = disk->fops->report_zones(disk, get_start_sect(bdev) + sector, - zones, &nrz); - if (ret) - return ret; + *nr_zones = min(*nr_zones, __blkdev_nr_zones(q, capacity - sector)); - for (i = 0; i < nrz; i++) { - if (!blkdev_report_zone(bdev, zones)) - break; - zones++; - } - - *nr_zones = i; - - return 0; + return disk->fops->report_zones(disk, sector, zones, nr_zones); } EXPORT_SYMBOL_GPL(blkdev_report_zones); @@ -185,15 +149,11 @@ static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, if (!blk_queue_zone_resetall(bdev_get_queue(bdev))) return false; - if (sector || nr_sectors != part_nr_sects_read(bdev->bd_part)) - return false; /* - * REQ_OP_ZONE_RESET_ALL can be executed only if the block device is - * the entire disk, that is, if the blocks device start offset is 0 and - * its capacity is the same as the entire disk. + * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors + * of the applicable zone range is the entire disk. */ - return get_start_sect(bdev) == 0 && - part_nr_sects_read(bdev->bd_part) == get_capacity(bdev->bd_disk); + return !sector && nr_sectors == get_capacity(bdev->bd_disk); } /** @@ -218,6 +178,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, { struct request_queue *q = bdev_get_queue(bdev); sector_t zone_sectors = blk_queue_zone_sectors(q); + sector_t capacity = get_capacity(bdev->bd_disk); sector_t end_sector = sector + nr_sectors; struct bio *bio = NULL; int ret; @@ -231,7 +192,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, if (!op_is_zone_mgmt(op)) return -EOPNOTSUPP; - if (!nr_sectors || end_sector > bdev->bd_part->nr_sects) + if (!nr_sectors || end_sector > capacity) /* Out of range */ return -EINVAL; @@ -239,8 +200,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, if (sector & (zone_sectors - 1)) return -EINVAL; - if ((nr_sectors & (zone_sectors - 1)) && - end_sector != bdev->bd_part->nr_sects) + if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity) return -EINVAL; while (sector < end_sector) { diff --git a/block/partition-generic.c b/block/partition-generic.c index aee643ce13d1..31bff3fb28af 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -459,56 +459,6 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev) return 0; } -static bool part_zone_aligned(struct gendisk *disk, - struct block_device *bdev, - sector_t from, sector_t size) -{ - unsigned int zone_sectors = bdev_zone_sectors(bdev); - - /* - * If this function is called, then the disk is a zoned block device - * (host-aware or host-managed). This can be detected even if the - * zoned block device support is disabled (CONFIG_BLK_DEV_ZONED not - * set). In this case, however, only host-aware devices will be seen - * as a block device is not created for host-managed devices. Without - * zoned block device support, host-aware drives can still be used as - * regular block devices (no zone operation) and their zone size will - * be reported as 0. Allow this case. - */ - if (!zone_sectors) - return true; - - /* - * Check partition start and size alignement. If the drive has a - * smaller last runt zone, ignore it and allow the partition to - * use it. Check the zone size too: it should be a power of 2 number - * of sectors. - */ - if (WARN_ON_ONCE(!is_power_of_2(zone_sectors))) { - u32 rem; - - div_u64_rem(from, zone_sectors, &rem); - if (rem) - return false; - if ((from + size) < get_capacity(disk)) { - div_u64_rem(size, zone_sectors, &rem); - if (rem) - return false; - } - - } else { - - if (from & (zone_sectors - 1)) - return false; - if ((from + size) < get_capacity(disk) && - (size & (zone_sectors - 1))) - return false; - - } - - return true; -} - int rescan_partitions(struct gendisk *disk, struct block_device *bdev) { struct parsed_partitions *state = NULL; @@ -544,6 +494,14 @@ rescan: } return -EIO; } + + /* Partitions are not supported on zoned block devices */ + if (bdev_is_zoned(bdev)) { + pr_warn("%s: ignoring partition table on zoned block device\n", + disk->disk_name); + goto out; + } + /* * If any partition code tried to read beyond EOD, try * unlocking native capacity even if partition table is @@ -607,21 +565,6 @@ rescan: } } - /* - * On a zoned block device, partitions should be aligned on the - * device zone size (i.e. zone boundary crossing not allowed). - * Otherwise, resetting the write pointer of the last zone of - * one partition may impact the following partition. - */ - if (bdev_is_zoned(bdev) && - !part_zone_aligned(disk, bdev, from, size)) { - printk(KERN_WARNING - "%s: p%d start %llu+%llu is not zone aligned\n", - disk->disk_name, p, (unsigned long long) from, - (unsigned long long) size); - continue; - } - part = add_partition(disk, p, from, size, state->parts[p].flags, &state->parts[p].info); @@ -635,6 +578,7 @@ rescan: md_autodetect_dev(part_to_dev(part)->devt); #endif } +out: free_partitions(state); return 0; } -- cgit v1.2.3 From d41003513e61dd9d4974cb441d30b63650b85654 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 11 Nov 2019 11:39:30 +0900 Subject: block: rework zone reporting Avoid the need to allocate a potentially large array of struct blk_zone in the block layer by switching the ->report_zones method interface to a callback model. Now the caller simply supplies a callback that is executed on each reported zone, and private data for it. Signed-off-by: Christoph Hellwig Signed-off-by: Shin'ichiro Kawasaki Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-zoned.c | 253 +++++++++++++++++++++--------------------------------- 1 file changed, 97 insertions(+), 156 deletions(-) (limited to 'block') diff --git a/block/blk-zoned.c b/block/blk-zoned.c index ae665e490858..6fad6f3f6980 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -101,44 +101,35 @@ EXPORT_SYMBOL_GPL(blkdev_nr_zones); * blkdev_report_zones - Get zones information * @bdev: Target block device * @sector: Sector from which to report zones - * @zones: Array of zone structures where to return the zones information - * @nr_zones: Number of zone structures in the zone array + * @nr_zones: Maximum number of zones to report + * @cb: Callback function called for each reported zone + * @data: Private data for the callback * * Description: - * Get zone information starting from the zone containing @sector. - * The number of zone information reported may be less than the number - * requested by @nr_zones. The number of zones actually reported is - * returned in @nr_zones. - * The caller must use memalloc_noXX_save/restore() calls to control - * memory allocations done within this function (zone array and command - * buffer allocation by the device driver). + * Get zone information starting from the zone containing @sector for at most + * @nr_zones, and call @cb for each zone reported by the device. + * To report all zones in a device starting from @sector, the BLK_ALL_ZONES + * constant can be passed to @nr_zones. + * Returns the number of zones reported by the device, or a negative errno + * value in case of failure. + * + * Note: The caller must use memalloc_noXX_save/restore() calls to control + * memory allocations done within this function. */ int blkdev_report_zones(struct block_device *bdev, sector_t sector, - struct blk_zone *zones, unsigned int *nr_zones) + unsigned int nr_zones, report_zones_cb cb, void *data) { - struct request_queue *q = bdev_get_queue(bdev); struct gendisk *disk = bdev->bd_disk; sector_t capacity = get_capacity(disk); - if (!blk_queue_is_zoned(q)) - return -EOPNOTSUPP; - - /* - * A block device that advertized itself as zoned must have a - * report_zones method. If it does not have one defined, the device - * driver has a bug. So warn about that. - */ - if (WARN_ON_ONCE(!disk->fops->report_zones)) + if (!blk_queue_is_zoned(bdev_get_queue(bdev)) || + WARN_ON_ONCE(!disk->fops->report_zones)) return -EOPNOTSUPP; - if (!*nr_zones || sector >= capacity) { - *nr_zones = 0; + if (!nr_zones || sector >= capacity) return 0; - } - *nr_zones = min(*nr_zones, __blkdev_nr_zones(q, capacity - sector)); - - return disk->fops->report_zones(disk, sector, zones, nr_zones); + return disk->fops->report_zones(disk, sector, nr_zones, cb, data); } EXPORT_SYMBOL_GPL(blkdev_report_zones); @@ -232,6 +223,20 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, } EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); +struct zone_report_args { + struct blk_zone __user *zones; +}; + +static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx, + void *data) +{ + struct zone_report_args *args = data; + + if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone))) + return -EFAULT; + return 0; +} + /* * BLKREPORTZONE ioctl processing. * Called from blkdev_ioctl. @@ -240,9 +245,9 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; + struct zone_report_args args; struct request_queue *q; struct blk_zone_report rep; - struct blk_zone *zones; int ret; if (!argp) @@ -264,32 +269,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, if (!rep.nr_zones) return -EINVAL; - rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones); - - zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone), - GFP_KERNEL | __GFP_ZERO); - if (!zones) - return -ENOMEM; - - ret = blkdev_report_zones(bdev, rep.sector, zones, &rep.nr_zones); - if (ret) - goto out; - - if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) { - ret = -EFAULT; - goto out; - } - - if (rep.nr_zones) { - if (copy_to_user(argp + sizeof(struct blk_zone_report), zones, - sizeof(struct blk_zone) * rep.nr_zones)) - ret = -EFAULT; - } + args.zones = argp + sizeof(struct blk_zone_report); + ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones, + blkdev_copy_zone_to_user, &args); + if (ret < 0) + return ret; - out: - kvfree(zones); - - return ret; + rep.nr_zones = ret; + if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) + return -EFAULT; + return 0; } /* @@ -351,31 +340,6 @@ static inline unsigned long *blk_alloc_zone_bitmap(int node, GFP_NOIO, node); } -/* - * Allocate an array of struct blk_zone to get nr_zones zone information. - * The allocated array may be smaller than nr_zones. - */ -static struct blk_zone *blk_alloc_zones(unsigned int *nr_zones) -{ - struct blk_zone *zones; - size_t nrz = min(*nr_zones, BLK_ZONED_REPORT_MAX_ZONES); - - /* - * GFP_KERNEL here is meaningless as the caller task context has - * the PF_MEMALLOC_NOIO flag set in blk_revalidate_disk_zones() - * with memalloc_noio_save(). - */ - zones = kvcalloc(nrz, sizeof(struct blk_zone), GFP_KERNEL); - if (!zones) { - *nr_zones = 0; - return NULL; - } - - *nr_zones = nrz; - - return zones; -} - void blk_queue_free_zone_bitmaps(struct request_queue *q) { kfree(q->seq_zones_bitmap); @@ -384,12 +348,21 @@ void blk_queue_free_zone_bitmaps(struct request_queue *q) q->seq_zones_wlock = NULL; } +struct blk_revalidate_zone_args { + struct gendisk *disk; + unsigned long *seq_zones_bitmap; + unsigned long *seq_zones_wlock; + sector_t sector; +}; + /* * Helper function to check the validity of zones of a zoned block device. */ -static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone, - sector_t *sector) +static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx, + void *data) { + struct blk_revalidate_zone_args *args = data; + struct gendisk *disk = args->disk; struct request_queue *q = disk->queue; sector_t zone_sectors = blk_queue_zone_sectors(q); sector_t capacity = get_capacity(disk); @@ -409,14 +382,14 @@ static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone, zone->len > zone_sectors) { pr_warn("%s: Invalid zoned device with larger last zone size\n", disk->disk_name); - return false; + return -ENODEV; } /* Check for holes in the zone report */ - if (zone->start != *sector) { + if (zone->start != args->sector) { pr_warn("%s: Zone gap at sectors %llu..%llu\n", - disk->disk_name, *sector, zone->start); - return false; + disk->disk_name, args->sector, zone->start); + return -ENODEV; } /* Check zone type */ @@ -428,12 +401,38 @@ static bool blk_zone_valid(struct gendisk *disk, struct blk_zone *zone, default: pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n", disk->disk_name, (int)zone->type, zone->start); - return false; + return -ENODEV; } - *sector += zone->len; + if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) + set_bit(idx, args->seq_zones_bitmap); - return true; + args->sector += zone->len; + return 0; +} + +static int blk_update_zone_info(struct gendisk *disk, unsigned int nr_zones, + struct blk_revalidate_zone_args *args) +{ + /* + * Ensure that all memory allocations in this context are done as + * if GFP_NOIO was specified. + */ + unsigned int noio_flag = memalloc_noio_save(); + struct request_queue *q = disk->queue; + int ret; + + args->seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones); + if (!args->seq_zones_wlock) + return -ENOMEM; + args->seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones); + if (!args->seq_zones_bitmap) + return -ENOMEM; + + ret = disk->fops->report_zones(disk, 0, nr_zones, + blk_revalidate_zone_cb, args); + memalloc_noio_restore(noio_flag); + return ret; } /** @@ -449,11 +448,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk) { struct request_queue *q = disk->queue; unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk)); - unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL; - unsigned int i, rep_nr_zones = 0, z = 0, nrz; - struct blk_zone *zones = NULL; - unsigned int noio_flag; - sector_t sector = 0; + struct blk_revalidate_zone_args args = { .disk = disk }; int ret = 0; if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) @@ -468,82 +463,28 @@ int blk_revalidate_disk_zones(struct gendisk *disk) return 0; } - /* - * Ensure that all memory allocations in this context are done as - * if GFP_NOIO was specified. - */ - noio_flag = memalloc_noio_save(); - - if (!nr_zones) - goto update; - - /* Allocate bitmaps */ - ret = -ENOMEM; - seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones); - if (!seq_zones_wlock) - goto out; - seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones); - if (!seq_zones_bitmap) - goto out; - - /* - * Get zone information to check the zones and initialize - * seq_zones_bitmap. - */ - rep_nr_zones = nr_zones; - zones = blk_alloc_zones(&rep_nr_zones); - if (!zones) - goto out; - - while (z < nr_zones) { - nrz = min(nr_zones - z, rep_nr_zones); - ret = disk->fops->report_zones(disk, sector, zones, &nrz); - if (ret) - goto out; - if (!nrz) - break; - for (i = 0; i < nrz; i++) { - if (!blk_zone_valid(disk, &zones[i], §or)) { - ret = -ENODEV; - goto out; - } - if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL) - set_bit(z, seq_zones_bitmap); - z++; - } - } - - if (WARN_ON(z != nr_zones)) { - ret = -EIO; - goto out; - } + if (nr_zones) + ret = blk_update_zone_info(disk, nr_zones, &args); -update: /* * Install the new bitmaps, making sure the queue is stopped and * all I/Os are completed (i.e. a scheduler is not referencing the * bitmaps). */ blk_mq_freeze_queue(q); - q->nr_zones = nr_zones; - swap(q->seq_zones_wlock, seq_zones_wlock); - swap(q->seq_zones_bitmap, seq_zones_bitmap); - blk_mq_unfreeze_queue(q); - -out: - memalloc_noio_restore(noio_flag); - - kvfree(zones); - kfree(seq_zones_wlock); - kfree(seq_zones_bitmap); - - if (ret) { + if (ret >= 0) { + q->nr_zones = nr_zones; + swap(q->seq_zones_wlock, args.seq_zones_wlock); + swap(q->seq_zones_bitmap, args.seq_zones_bitmap); + ret = 0; + } else { pr_warn("%s: failed to revalidate zones\n", disk->disk_name); - blk_mq_freeze_queue(q); blk_queue_free_zone_bitmaps(q); - blk_mq_unfreeze_queue(q); } + blk_mq_unfreeze_queue(q); + kfree(args.seq_zones_wlock); + kfree(args.seq_zones_bitmap); return ret; } EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones); -- cgit v1.2.3