diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-05 10:51:40 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-08-05 10:51:40 -0700 |
commit | e0fc99e21e6e299673f1640105ac0c1d829c2d93 (patch) | |
tree | 4637b171164c3e653e9002b2bc962b32f05b5a07 | |
parent | 4834ce9d8e074bb7ae197632e0708219b9f389b5 (diff) | |
parent | f59589fc89665102923725e80e12f782d5f74f67 (diff) |
Merge tag 'for-5.9/drivers-20200803' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
- NVMe:
- ZNS support (Aravind, Keith, Matias, Niklas)
- Misc cleanups, optimizations, fixes (Baolin, Chaitanya, David,
Dongli, Max, Sagi)
- null_blk zone capacity support (Aravind)
- MD:
- raid5/6 fixes (ChangSyun)
- Warning fixes (Damien)
- raid5 stripe fixes (Guoqing, Song, Yufen)
- sysfs deadlock fix (Junxiao)
- raid10 deadlock fix (Vitaly)
- struct_size conversions (Gustavo)
- Set of bcache updates/fixes (Coly)
* tag 'for-5.9/drivers-20200803' of git://git.kernel.dk/linux-block: (117 commits)
md/raid5: Allow degraded raid6 to do rmw
md/raid5: Fix Force reconstruct-write io stuck in degraded raid5
raid5: don't duplicate code for different paths in handle_stripe
raid5-cache: hold spinlock instead of mutex in r5c_journal_mode_show
md: print errno in super_written
md/raid5: remove the redundant setting of STRIPE_HANDLE
md: register new md sysfs file 'uuid' read-only
md: fix max sectors calculation for super 1.0
nvme-loop: remove extra variable in create ctrl
nvme-loop: set ctrl state connecting after init
nvme-multipath: do not fall back to __nvme_find_path() for non-optimized paths
nvme-multipath: fix logic for non-optimized paths
nvme-rdma: fix controller reset hang during traffic
nvme-tcp: fix controller reset hang during traffic
nvmet: introduce the passthru Kconfig option
nvmet: introduce the passthru configfs interface
nvmet: Add passthru enable/disable helpers
nvmet: add passthru code to process commands
nvme: export nvme_find_get_ns() and nvme_put_ns()
nvme: introduce nvme_ctrl_get_by_path()
...
70 files changed, 3149 insertions, 826 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index ed8c14f161ee..2322eb748b38 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -273,6 +273,24 @@ Description: device ("host-aware" or "host-managed" zone model). For regular block devices, the value is always 0. +What: /sys/block/<disk>/queue/max_active_zones +Date: July 2020 +Contact: Niklas Cassel <niklas.cassel@wdc.com> +Description: + For zoned block devices (zoned attribute indicating + "host-managed" or "host-aware"), the sum of zones belonging to + any of the zone states: EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, + is limited by this value. If this value is 0, there is no limit. + +What: /sys/block/<disk>/queue/max_open_zones +Date: July 2020 +Contact: Niklas Cassel <niklas.cassel@wdc.com> +Description: + For zoned block devices (zoned attribute indicating + "host-managed" or "host-aware"), the sum of zones belonging to + any of the zone states: EXPLICIT OPEN or IMPLICIT OPEN, + is limited by this value. If this value is 0, there is no limit. + What: /sys/block/<disk>/queue/chunk_sectors Date: September 2016 Contact: Hannes Reinecke <hare@suse.com> diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst index d973d469ffc4..cc8781b96b4d 100644 --- a/Documentation/admin-guide/md.rst +++ b/Documentation/admin-guide/md.rst @@ -426,6 +426,10 @@ All md devices contain: The accepted values when writing to this file are ``ppl`` and ``resync``, used to enable and disable PPL. + uuid + This indicates the UUID of the array in the following format: + xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + As component devices are added to an md array, they appear in the ``md`` directory as new directories named:: diff --git a/Documentation/block/queue-sysfs.rst b/Documentation/block/queue-sysfs.rst index 6a8513af9201..f261a5c84170 100644 --- a/Documentation/block/queue-sysfs.rst +++ b/Documentation/block/queue-sysfs.rst @@ -117,6 +117,20 @@ Maximum number of elements in a DMA scatter/gather list with integrity data that will be submitted by the block layer core to the associated block driver. +max_active_zones (RO) +--------------------- +For zoned block devices (zoned attribute indicating "host-managed" or +"host-aware"), the sum of zones belonging to any of the zone states: +EXPLICIT OPEN, IMPLICIT OPEN or CLOSED, is limited by this value. +If this value is 0, there is no limit. + +max_open_zones (RO) +------------------- +For zoned block devices (zoned attribute indicating "host-managed" or +"host-aware"), the sum of zones belonging to any of the zone states: +EXPLICIT OPEN or IMPLICIT OPEN, is limited by this value. +If this value is 0, there is no limit. + max_sectors_kb (RW) ------------------- This is the maximum number of kilobytes that the block layer will allow diff --git a/block/Kconfig b/block/Kconfig index 9357d7302398..bbad5e8bbffe 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -86,9 +86,10 @@ config BLK_DEV_ZONED select MQ_IOSCHED_DEADLINE help Block layer zoned block device support. This option enables - support for ZAC/ZBC host-managed and host-aware zoned block devices. + support for ZAC/ZBC/ZNS host-managed and host-aware zoned block + devices. - Say yes here if you have a ZAC or ZBC storage device. + Say yes here if you have a ZAC, ZBC, or ZNS storage device. config BLK_DEV_THROTTLING bool "Block layer bio throttling support" diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index be67952e7be2..7dda709f3ccb 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -306,6 +306,16 @@ static ssize_t queue_nr_zones_show(struct request_queue *q, char *page) return queue_var_show(blk_queue_nr_zones(q), page); } +static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_max_open_zones(q), page); +} + +static ssize_t queue_max_active_zones_show(struct request_queue *q, char *page) +{ + return queue_var_show(queue_max_active_zones(q), page); +} + static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { return queue_var_show((blk_queue_nomerges(q) << 1) | @@ -668,6 +678,16 @@ static struct queue_sysfs_entry queue_nr_zones_entry = { .show = queue_nr_zones_show, }; +static struct queue_sysfs_entry queue_max_open_zones_entry = { + .attr = {.name = "max_open_zones", .mode = 0444 }, + .show = queue_max_open_zones_show, +}; + +static struct queue_sysfs_entry queue_max_active_zones_entry = { + .attr = {.name = "max_active_zones", .mode = 0444 }, + .show = queue_max_active_zones_show, +}; + static struct queue_sysfs_entry queue_nomerges_entry = { .attr = {.name = "nomerges", .mode = 0644 }, .show = queue_nomerges_show, @@ -766,6 +786,8 @@ static struct attribute *queue_attrs[] = { &queue_nonrot_entry.attr, &queue_zoned_entry.attr, &queue_nr_zones_entry.attr, + &queue_max_open_zones_entry.attr, + &queue_max_active_zones_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, @@ -793,6 +815,11 @@ static umode_t queue_attr_visible(struct kobject *kobj, struct attribute *attr, (!q->mq_ops || !q->mq_ops->timeout)) return 0; + if ((attr == &queue_max_open_zones_entry.attr || + attr == &queue_max_active_zones_entry.attr) && + !blk_queue_is_zoned(q)) + return 0; + return attr->mode; } diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 23831fa8701d..81152a260354 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -312,6 +312,7 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, return ret; rep.nr_zones = ret; + rep.flags = BLK_ZONE_REP_CAPACITY; if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) return -EFAULT; return 0; diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 6941062272e0..d04de10a63e4 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -45,6 +45,9 @@ static const guid_t prp_guids[] = { /* Thunderbolt GUID for WAKE_SUPPORTED: 6c501103-c189-4296-ba72-9bf5a26ebe5d */ GUID_INIT(0x6c501103, 0xc189, 0x4296, 0xba, 0x72, 0x9b, 0xf5, 0xa2, 0x6e, 0xbe, 0x5d), + /* Storage device needs D3 GUID: 5025030f-842f-4ab4-a561-99a5189762d0 */ + GUID_INIT(0x5025030f, 0x842f, 0x4ab4, + 0xa5, 0x61, 0x99, 0xa5, 0x18, 0x97, 0x62, 0xd0), }; /* ACPI _DSD data subnodes GUID: dbb8e3e6-5886-4ba6-8795-1319f52a966b */ diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index 81b311c9d781..daed4a9c3436 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -49,6 +49,7 @@ struct nullb_device { unsigned long completion_nsec; /* time in ns to complete a request */ unsigned long cache_size; /* disk cache size in MB */ unsigned long zone_size; /* zone size in MB if device is zoned */ + unsigned long zone_capacity; /* zone capacity in MB if device is zoned */ unsigned int zone_nr_conv; /* number of conventional zones */ unsigned int submit_queues; /* number of submission queues */ unsigned int home_node; /* home node for the device */ diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 907c6858aec0..47a9dad880af 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -200,6 +200,10 @@ static unsigned long g_zone_size = 256; module_param_named(zone_size, g_zone_size, ulong, S_IRUGO); MODULE_PARM_DESC(zone_size, "Zone size in MB when block device is zoned. Must be power-of-two: Default: 256"); +static unsigned long g_zone_capacity; +module_param_named(zone_capacity, g_zone_capacity, ulong, 0444); +MODULE_PARM_DESC(zone_capacity, "Zone capacity in MB when block device is zoned. Can be less than or equal to zone size. Default: Zone size"); + static unsigned int g_zone_nr_conv; module_param_named(zone_nr_conv, g_zone_nr_conv, uint, 0444); MODULE_PARM_DESC(zone_nr_conv, "Number of conventional zones when block device is zoned. Default: 0"); @@ -341,6 +345,7 @@ NULLB_DEVICE_ATTR(mbps, uint, NULL); NULLB_DEVICE_ATTR(cache_size, ulong, NULL); NULLB_DEVICE_ATTR(zoned, bool, NULL); NULLB_DEVICE_ATTR(zone_size, ulong, NULL); +NULLB_DEVICE_ATTR(zone_capacity, ulong, NULL); NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) @@ -457,6 +462,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_badblocks, &nullb_device_attr_zoned, &nullb_device_attr_zone_size, + &nullb_device_attr_zone_capacity, &nullb_device_attr_zone_nr_conv, NULL, }; @@ -510,7 +516,8 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { - return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_nr_conv\n"); + return snprintf(page, PAGE_SIZE, + "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -571,6 +578,7 @@ static struct nullb_device *null_alloc_dev(void) dev->use_per_node_hctx = g_use_per_node_hctx; dev->zoned = g_zoned; dev->zone_size = g_zone_size; + dev->zone_capacity = g_zone_capacity; dev->zone_nr_conv = g_zone_nr_conv; return dev; } diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index cc47606d8ffe..3d25c9ad2383 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -28,6 +28,15 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) return -EINVAL; } + if (!dev->zone_capacity) + dev->zone_capacity = dev->zone_size; + + if (dev->zone_capacity > dev->zone_size) { + pr_err("null_blk: zone capacity (%lu MB) larger than zone size (%lu MB)\n", + dev->zone_capacity, dev->zone_size); + return -EINVAL; + } + dev->zone_size_sects = dev->zone_size << ZONE_SIZE_SHIFT; dev->nr_zones = dev_size >> (SECTOR_SHIFT + ilog2(dev->zone_size_sects)); @@ -47,6 +56,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) zone->start = sector; zone->len = dev->zone_size_sects; + zone->capacity = zone->len; zone->wp = zone->start + zone->len; zone->type = BLK_ZONE_TYPE_CONVENTIONAL; zone->cond = BLK_ZONE_COND_NOT_WP; @@ -59,6 +69,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) zone->start = zone->wp = sector; zone->len = dev->zone_size_sects; + zone->capacity = dev->zone_capacity << ZONE_SIZE_SHIFT; zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ; zone->cond = BLK_ZONE_COND_EMPTY; @@ -185,6 +196,9 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, return BLK_STS_IOERR; } + if (zone->wp + nr_sectors > zone->start + zone->capacity) + return BLK_STS_IOERR; + if (zone->cond != BLK_ZONE_COND_EXP_OPEN) zone->cond = BLK_ZONE_COND_IMP_OPEN; @@ -193,7 +207,7 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, return ret; zone->wp += nr_sectors; - if (zone->wp == zone->start + zone->len) + if (zone->wp == zone->start + zone->capacity) zone->cond = BLK_ZONE_COND_FULL; return BLK_STS_OK; default: diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 10f6368117d8..01333af13bbb 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -562,13 +562,15 @@ static int rsxx_eeh_frozen(struct pci_dev *dev) for (i = 0; i < card->n_targets; i++) { if (card->ctrl[i].status.buf) - pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, - card->ctrl[i].status.buf, - card->ctrl[i].status.dma_addr); + dma_free_coherent(&card->dev->dev, + STATUS_BUFFER_SIZE8, + card->ctrl[i].status.buf, + card->ctrl[i].status.dma_addr); if (card->ctrl[i].cmd.buf) - pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, - card->ctrl[i].cmd.buf, - card->ctrl[i].cmd.dma_addr); + dma_free_coherent(&card->dev->dev, + COMMAND_BUFFER_SIZE8, + card->ctrl[i].cmd.buf, + card->ctrl[i].cmd.dma_addr); } return 0; @@ -711,15 +713,15 @@ static pci_ers_result_t rsxx_slot_reset(struct pci_dev *dev) failed_hw_buffers_init: for (i = 0; i < card->n_targets; i++) { if (card->ctrl[i].status.buf) - pci_free_consistent(card->dev, - STATUS_BUFFER_SIZE8, - card->ctrl[i].status.buf, - card->ctrl[i].status.dma_addr); + dma_free_coherent(&card->dev->dev, + STATUS_BUFFER_SIZE8, + card->ctrl[i].status.buf, + card->ctrl[i].status.dma_addr); if (card->ctrl[i].cmd.buf) - pci_free_consistent(card->dev, - COMMAND_BUFFER_SIZE8, - card->ctrl[i].cmd.buf, - card->ctrl[i].cmd.dma_addr); + dma_free_coherent(&card->dev->dev, + COMMAND_BUFFER_SIZE8, + card->ctrl[i].cmd.buf, + card->ctrl[i].cmd.dma_addr); } failed_hw_setup: rsxx_eeh_failure(dev); diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index bf7dd96db9b3..d1ca4d059c20 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -27,7 +27,7 @@ config BCACHE_CLOSURES_DEBUG interface to list them, which makes it possible to see asynchronous operations that get stuck. -config BCACHE_ASYNC_REGISTRAION +config BCACHE_ASYNC_REGISTRATION bool "Asynchronous device registration (EXPERIMENTAL)" depends on BCACHE help diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile index fd714628da6a..5b87e59676b8 100644 --- a/drivers/md/bcache/Makefile +++ b/drivers/md/bcache/Makefile @@ -4,4 +4,4 @@ obj-$(CONFIG_BCACHE) += bcache.o bcache-y := alloc.o bset.o btree.o closure.o debug.o extents.o\ io.o journal.o movinggc.o request.o stats.o super.o sysfs.o trace.o\ - util.o writeback.o + util.o writeback.o features.o diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index a1df0d95151c..52035a78d836 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -87,7 +87,7 @@ void bch_rescale_priorities(struct cache_set *c, int sectors) { struct cache *ca; struct bucket *b; - unsigned int next = c->nbuckets * c->sb.bucket_size / 1024; + unsigned long next = c->nbuckets * c->sb.bucket_size / 1024; unsigned int i; int r; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 3c708e8b5e2d..4fd03d2496d8 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -264,7 +264,7 @@ struct bcache_device { #define BCACHE_DEV_UNLINK_DONE 2 #define BCACHE_DEV_WB_RUNNING 3 #define BCACHE_DEV_RATE_DW_RUNNING 4 - unsigned int nr_stripes; + int nr_stripes; unsigned int stripe_size; atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; @@ -762,11 +762,32 @@ struct bbio { #define bucket_bytes(c) ((c)->sb.bucket_size << 9) #define block_bytes(c) ((c)->sb.block_size << 9) -#define prios_per_bucket(c) \ - ((bucket_bytes(c) - sizeof(struct prio_set)) / \ +static inline unsigned int meta_bucket_pages(struct cache_sb *sb) +{ + unsigned int n, max_pages; + + max_pages = min_t(unsigned int, + __rounddown_pow_of_two(USHRT_MAX) / PAGE_SECTORS, + MAX_ORDER_NR_PAGES); + + n = sb->bucket_size / PAGE_SECTORS; + if (n > max_pages) + n = max_pages; + + return n; +} + +static inline unsigned int meta_bucket_bytes(struct cache_sb *sb) +{ + return meta_bucket_pages(sb) << PAGE_SHIFT; +} + +#define prios_per_bucket(ca) \ + ((meta_bucket_bytes(&(ca)->sb) - sizeof(struct prio_set)) / \ sizeof(struct bucket_disk)) -#define prio_buckets(c) \ - DIV_ROUND_UP((size_t) (c)->sb.nbuckets, prios_per_bucket(c)) + +#define prio_buckets(ca) \ + DIV_ROUND_UP((size_t) (ca)->sb.nbuckets, prios_per_bucket(ca)) static inline size_t sector_to_bucket(struct cache_set *c, sector_t s) { diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 4995fcaefe29..67a2c47f4201 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -322,7 +322,7 @@ int bch_btree_keys_alloc(struct btree_keys *b, b->page_order = page_order; - t->data = (void *) __get_free_pages(gfp, b->page_order); + t->data = (void *) __get_free_pages(__GFP_COMP|gfp, b->page_order); if (!t->data) goto err; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index d5c51e332046..3d8bd0692af3 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -738,7 +738,7 @@ void bch_btree_cache_free(struct cache_set *c) if (c->verify_data) list_move(&c->verify_data->list, &c->btree_cache); - free_pages((unsigned long) c->verify_ondisk, ilog2(bucket_pages(c)));< |