diff options
62 files changed, 1395 insertions, 485 deletions
diff --git a/Documentation/ABI/testing/sysfs-class-rnbd-client b/Documentation/ABI/testing/sysfs-class-rnbd-client index 00c0286733d4..2aa05b3e348e 100644 --- a/Documentation/ABI/testing/sysfs-class-rnbd-client +++ b/Documentation/ABI/testing/sysfs-class-rnbd-client @@ -66,7 +66,7 @@ Description: Expected format is the following:: The rnbd_server prepends the <device_path> received from client with <dev_search_path> and tries to open the <dev_search_path>/<device_path> block device. On success, - a /dev/rnbd<N> device file, a /sys/block/rnbd_client/rnbd<N>/ + a /dev/rnbd<N> device file, a /sys/block/rnbd<N>/ directory and an entry in /sys/class/rnbd-client/ctl/devices will be created. @@ -95,12 +95,12 @@ Description: Expected format is the following:: --------------------------------- After mapping, the device file can be found by: - o The symlink /sys/class/rnbd-client/ctl/devices/<device_id> + o The symlink /sys/class/rnbd-client/ctl/devices/<device_id>@<session_name> points to /sys/block/<dev-name>. The last part of the symlink destination is the same as the device name. By extracting the last part of the path the path to the device /dev/<dev-name> can be build. - * /dev/block/$(cat /sys/class/rnbd-client/ctl/devices/<device_id>/dev) + * /dev/block/$(cat /sys/class/rnbd-client/ctl/devices/<device_id>@<session_name>/dev) How to find the <device_id> of the device is described on the next section. @@ -110,7 +110,7 @@ Date: Feb 2020 KernelVersion: 5.7 Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com> Description: For each device mapped on the client a new symbolic link is created as - /sys/class/rnbd-client/ctl/devices/<device_id>, which points + /sys/class/rnbd-client/ctl/devices/<device_id>@<session_name>, which points to the block device created by rnbd (/sys/block/rnbd<N>/). The <device_id> of each device is created as follows: diff --git a/Documentation/ABI/testing/sysfs-class-rnbd-server b/Documentation/ABI/testing/sysfs-class-rnbd-server index ba60a90c0e45..6c5996cd7cfb 100644 --- a/Documentation/ABI/testing/sysfs-class-rnbd-server +++ b/Documentation/ABI/testing/sysfs-class-rnbd-server @@ -48,3 +48,11 @@ Date: Feb 2020 KernelVersion: 5.7 Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com> Description: Contains the device access mode: ro, rw or migration. + +What: /sys/class/rnbd-server/ctl/devices/<device_name>/sessions/<session-name>/force_close +Date: Nov 2020 +KernelVersion: 5.10 +Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com> +Description: Write "1" to the file to close the device on server side. Please + note that the client side device will not be closed, read or + write to the device will get -ENOTCONN. diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index e3e2ab0acf83..778247bb1d61 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -104,6 +104,8 @@ struct ccw_device { was successfully verified. */ #define PE_PATHGROUP_ESTABLISHED 0x4 /* A pathgroup was reset and had to be established again. */ +#define PE_PATH_FCES_EVENT 0x8 /* The FCES Status of a path has + * changed. */ /* * Possible CIO actions triggered by the unit check handler. diff --git a/arch/s390/include/asm/cio.h b/arch/s390/include/asm/cio.h index 23dceb8d0453..ac02df906cae 100644 --- a/arch/s390/include/asm/cio.h +++ b/arch/s390/include/asm/cio.h @@ -373,5 +373,6 @@ int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta); int chsc_sstpi(void *page, void *result, size_t size); int chsc_stzi(void *page, void *result, size_t size); int chsc_sgib(u32 origin); +int chsc_scud(u16 cu, u64 *esm, u8 *esm_valid); #endif diff --git a/block/blk-settings.c b/block/blk-settings.c index 659cdb8a07fe..43990b1d148b 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -157,10 +157,16 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_secto __func__, max_hw_sectors); } + max_hw_sectors = round_down(max_hw_sectors, + limits->logical_block_size >> SECTOR_SHIFT); limits->max_hw_sectors = max_hw_sectors; + max_sectors = min_not_zero(max_hw_sectors, limits->max_dev_sectors); max_sectors = min_t(unsigned int, max_sectors, BLK_DEF_MAX_SECTORS); + max_sectors = round_down(max_sectors, + limits->logical_block_size >> SECTOR_SHIFT); limits->max_sectors = max_sectors; + q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); @@ -321,13 +327,20 @@ EXPORT_SYMBOL(blk_queue_max_segment_size); **/ void blk_queue_logical_block_size(struct request_queue *q, unsigned int size) { - q->limits.logical_block_size = size; + struct queue_limits *limits = &q->limits; - if (q->limits.physical_block_size < size) - q->limits.physical_block_size = size; + limits->logical_block_size = size; - if (q->limits.io_min < q->limits.physical_block_size) - q->limits.io_min = q->limits.physical_block_size; + if (limits->physical_block_size < size) + limits->physical_block_size = size; + + if (limits->io_min < limits->physical_block_size) + limits->io_min = limits->physical_block_size; + + limits->max_hw_sectors = + round_down(limits->max_hw_sectors, size >> SECTOR_SHIFT); + limits->max_sectors = + round_down(limits->max_sectors, size >> SECTOR_SHIFT); } EXPORT_SYMBOL(blk_queue_logical_block_size); diff --git a/block/blk.h b/block/blk.h index d23d018fd2cd..7550364c326c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -90,18 +90,6 @@ static inline bool bvec_gap_to_prev(struct request_queue *q, return __bvec_gap_to_prev(q, bprv, offset); } -static inline void blk_rq_bio_prep(struct request *rq, struct bio *bio, - unsigned int nr_segs) -{ - rq->nr_phys_segments = nr_segs; - rq->__data_len = bio->bi_iter.bi_size; - rq->bio = rq->biotail = bio; - rq->ioprio = bio_prio(bio); - - if (bio->bi_disk) - rq->rq_disk = bio->bi_disk; -} - #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); bool __bio_integrity_endio(struct bio *); diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ecceaaa1a66f..262326973ee0 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -16,13 +16,7 @@ menuconfig BLK_DEV if BLK_DEV -config BLK_DEV_NULL_BLK - tristate "Null test block driver" - select CONFIGFS_FS - -config BLK_DEV_NULL_BLK_FAULT_INJECTION - bool "Support fault injection for Null test block driver" - depends on BLK_DEV_NULL_BLK && FAULT_INJECTION +source "drivers/block/null_blk/Kconfig" config BLK_DEV_FD tristate "Normal floppy disk support" diff --git a/drivers/block/Makefile b/drivers/block/Makefile index e1f63117ee94..a3170859e01d 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -41,12 +41,7 @@ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_BLK_DEV_RNBD) += rnbd/ -obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o -null_blk-objs := null_blk_main.o -ifeq ($(CONFIG_BLK_DEV_ZONED), y) -null_blk-$(CONFIG_TRACING) += null_blk_trace.o -endif -null_blk-$(CONFIG_BLK_DEV_ZONED) += null_blk_zoned.o +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk/ skd-y := skd_main.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/loop.c b/drivers/block/loop.c index d2ce1ddc192d..e5ff328f0917 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -2304,7 +2304,6 @@ MODULE_ALIAS("devname:loop-control"); static int __init loop_init(void) { int i, nr; - unsigned long range; struct loop_device *lo; int err; @@ -2341,13 +2340,10 @@ static int __init loop_init(void) * /dev/loop-control interface, or be instantiated by accessing * a 'dead' device node. */ - if (max_loop) { + if (max_loop) nr = max_loop; - range = max_loop << part_shift; - } else { + else nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; - range = 1UL << MINORBITS; - } err = misc_register(&loop_misc); if (err < 0) diff --git a/drivers/block/null_blk/Kconfig b/drivers/block/null_blk/Kconfig new file mode 100644 index 000000000000..6bf1f8ca20a2 --- /dev/null +++ b/drivers/block/null_blk/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Null block device driver configuration +# + +config BLK_DEV_NULL_BLK + tristate "Null test block driver" + select CONFIGFS_FS + +config BLK_DEV_NULL_BLK_FAULT_INJECTION + bool "Support fault injection for Null test block driver" + depends on BLK_DEV_NULL_BLK && FAULT_INJECTION diff --git a/drivers/block/null_blk/Makefile b/drivers/block/null_blk/Makefile new file mode 100644 index 000000000000..84c36e512ab8 --- /dev/null +++ b/drivers/block/null_blk/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +# needed for trace events +ccflags-y += -I$(src) + +obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o +null_blk-objs := main.o +ifeq ($(CONFIG_BLK_DEV_ZONED), y) +null_blk-$(CONFIG_TRACING) += trace.o +endif +null_blk-$(CONFIG_BLK_DEV_ZONED) += zoned.o diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk/main.c index 4685ea401d5b..5357c3a4a36f 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk/main.c @@ -152,6 +152,10 @@ static int g_bs = 512; module_param_named(bs, g_bs, int, 0444); MODULE_PARM_DESC(bs, "Block size (in bytes)"); +static int g_max_sectors; +module_param_named(max_sectors, g_max_sectors, int, 0444); +MODULE_PARM_DESC(max_sectors, "Maximum size of a command (in 512B sectors)"); + static unsigned int nr_devices = 1; module_param(nr_devices, uint, 0444); MODULE_PARM_DESC(nr_devices, "Number of devices to register"); @@ -346,6 +350,7 @@ NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues); NULLB_DEVICE_ATTR(home_node, uint, NULL); NULLB_DEVICE_ATTR(queue_mode, uint, NULL); NULLB_DEVICE_ATTR(blocksize, uint, NULL); +NULLB_DEVICE_ATTR(max_sectors, uint, NULL); NULLB_DEVICE_ATTR(irqmode, uint, NULL); NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL); NULLB_DEVICE_ATTR(index, uint, NULL); @@ -463,6 +468,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { &nullb_device_attr_home_node, &nullb_device_attr_queue_mode, &nullb_device_attr_blocksize, + &nullb_device_attr_max_sectors, &nullb_device_attr_irqmode, &nullb_device_attr_hw_queue_depth, &nullb_device_attr_index, @@ -533,7 +539,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) static ssize_t memb_group_features_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, - "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active\n"); + "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors\n"); } CONFIGFS_ATTR_RO(memb_group_, features); @@ -588,6 +594,7 @@ static struct nullb_device *null_alloc_dev(void) dev->home_node = g_home_node; dev->queue_mode = g_queue_mode; dev->blocksize = g_bs; + dev->max_sectors = g_max_sectors; dev->irqmode = g_irqmode; dev->hw_queue_depth = g_hw_queue_depth; dev->blocking = g_blocking; @@ -1076,13 +1083,16 @@ static void nullb_fill_pattern(struct nullb *nullb, struct page *page, kunmap_atomic(dst); } -static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) +blk_status_t null_handle_discard(struct nullb_device *dev, + sector_t sector, sector_t nr_sectors) { + struct nullb *nullb = dev->nullb; + size_t n = nr_sectors << SECTOR_SHIFT; size_t temp; spin_lock_irq(&nullb->lock); while (n > 0) { - temp = min_t(size_t, n, nullb->dev->blocksize); + temp = min_t(size_t, n, dev->blocksize); null_free_sector(nullb, sector, false); if (null_cache_active(nullb)) null_free_sector(nullb, sector, true); @@ -1090,6 +1100,8 @@ static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) n -= temp; } spin_unlock_irq(&nullb->lock); + + return BLK_STS_OK; } static int null_handle_flush(struct nullb *nullb) @@ -1149,17 +1161,10 @@ static int null_handle_rq(struct nullb_cmd *cmd) struct nullb *nullb = cmd->nq->dev->nullb; int err; unsigned int len; - sector_t sector; + sector_t sector = blk_rq_pos(rq); struct req_iterator iter; struct bio_vec bvec; - sector = blk_rq_pos(rq); - - if (req_op(rq) == REQ_OP_DISCARD) { - null_handle_discard(nullb, sector, blk_rq_bytes(rq)); - return 0; - } - spin_lock_irq(&nullb->lock); rq_for_each_segment(bvec, rq, iter) { len = bvec.bv_len; @@ -1183,18 +1188,10 @@ static int null_handle_bio(struct nullb_cmd *cmd) struct nullb *nullb = cmd->nq->dev->nullb; int err; unsigned int len; - sector_t sector; + sector_t sector = bio->bi_iter.bi_sector; struct bio_vec bvec; struct bvec_iter iter; - sector = bio->bi_iter.bi_sector; - - if (bio_op(bio) == REQ_OP_DISCARD) { - null_handle_discard(nullb, sector, - bio_sectors(bio) << SECTOR_SHIFT); - return 0; - } - spin_lock_irq(&nullb->lock); bio_for_each_segment(bvec, bio, iter) { len = bvec.bv_len; @@ -1263,11 +1260,16 @@ static inline blk_status_t null_handle_badblocks(struct nullb_cmd *cmd, } static inline blk_status_t null_handle_memory_backed(struct nullb_cmd *cmd, - enum req_opf op) + enum req_opf op, + sector_t sector, + sector_t nr_sectors) { struct nullb_device *dev = cmd->nq->dev; int err; + if (op == REQ_OP_DISCARD) + return null_handle_discard(dev, sector, nr_sectors); + if (dev->queue_mode == NULL_Q_BIO) err = null_handle_bio(cmd); else @@ -1343,7 +1345,7 @@ blk_status_t null_process_cmd(struct nullb_cmd *cmd, } if (dev->memory_backed) - return null_handle_memory_backed(cmd, op); + return null_handle_memory_backed(cmd, op, sector, nr_sectors); return BLK_STS_OK; } @@ -1589,6 +1591,12 @@ static void null_config_discard(struct nullb *nullb) if (nullb->dev->discard == false) return; + if (!nullb->dev->memory_backed) { + nullb->dev->discard = false; + pr_info("discard option is ignored without memory backing\n"); + return; + } + if (nullb->dev->zoned) { nullb->dev->discard = false; pr_info("discard option is ignored in zoned mode\n"); @@ -1866,6 +1874,11 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_logical_block_size(nullb->q, dev->blocksize); blk_queue_physical_block_size(nullb->q, dev->blocksize); + if (!dev->max_sectors) + dev->max_sectors = queue_max_hw_sectors(nullb->q); + dev->max_sectors = min_t(unsigned int, dev->max_sectors, + BLK_DEF_MAX_SECTORS); + blk_queue_max_hw_sectors(nullb->q, dev->max_sectors); null_config_discard(nullb); @@ -1909,6 +1922,12 @@ static int __init null_init(void) g_bs = PAGE_SIZE; } + if (g_max_sectors > BLK_DEF_MAX_SECTORS) { + pr_warn("invalid max sectors\n"); + pr_warn("defaults max sectors to %u\n", BLK_DEF_MAX_SECTORS); + g_max_sectors = BLK_DEF_MAX_SECTORS; + } + if (g_home_node != NUMA_NO_NODE && g_home_node >= nr_online_nodes) { pr_err("invalid home_node value\n"); g_home_node = NUMA_NO_NODE; diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk/null_blk.h index c24d9b5ad81a..83504f3cc9d6 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk/null_blk.h @@ -12,6 +12,8 @@ #include <linux/configfs.h> #include <linux/badblocks.h> #include <linux/fault-inject.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> struct nullb_cmd { struct request *rq; @@ -32,6 +34,26 @@ struct nullb_queue { struct nullb_cmd *cmds; }; +struct nullb_zone { + /* + * Zone lock to prevent concurrent modification of a zone write + * pointer position and condition: with memory backing, a write + * command execution may sleep on memory allocation. For this case, + * use mutex as the zone lock. Otherwise, use the spinlock for + * locking the zone. + */ + union { + spinlock_t spinlock; + struct mutex mutex; + }; + enum blk_zone_type type; + enum blk_zone_cond cond; + sector_t start; + sector_t wp; + unsigned int len; + unsigned int capacity; +}; + struct nullb_device { struct nullb *nullb; struct config_item item; @@ -45,10 +67,11 @@ struct nullb_device { unsigned int nr_zones_imp_open; unsigned int nr_zones_exp_open; unsigned int nr_zones_closed; - struct blk_zone *zones; + unsigned int imp_close_zone_no; + struct nullb_zone *zones; sector_t zone_size_sects; - spinlock_t zone_lock; - unsigned long *zone_locks; + bool need_zone_res_mgmt; + spinlock_t zone_res_lock; unsigned long size; /* device size in MB */ unsigned long completion_nsec; /* time in ns to complete a request */ @@ -62,6 +85,7 @@ struct nullb_device { unsigned int home_node; /* home node for the device */ unsigned int queue_mode; /* block interface */ unsigned int blocksize; /* block size */ + unsigned int max_sectors; /* Max sectors per command */ unsigned int irqmode; /* IRQ completion handler */ unsigned int hw_queue_depth; /* queue depth */ unsigned int index; /* index of the disk, only valid with a disk */ @@ -93,6 +117,8 @@ struct nullb { char disk_name[DISK_NAME_LEN]; }; +blk_status_t null_handle_discard(struct nullb_device *dev, sector_t sector, + sector_t nr_sectors); blk_status_t null_process_cmd(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, unsigned int nr_sectors); diff --git a/drivers/block/null_blk_trace.c b/drivers/block/null_blk/trace.c index f246e7bff698..3711cba16071 100644 --- a/drivers/block/null_blk_trace.c +++ b/drivers/block/null_blk/trace.c @@ -4,7 +4,7 @@ * * Copyright (C) 2020 Western Digital Corporation or its affiliates. */ -#include "null_blk_trace.h" +#include "trace.h" /* * Helper to use for all null_blk traces to extract disk name. diff --git a/drivers/block/null_blk_trace.h b/drivers/block/null_blk/trace.h index 4f83032eb544..ce3b430e88c5 100644 --- a/drivers/block/null_blk_trace.h +++ b/drivers/block/null_blk/trace.h @@ -73,7 +73,7 @@ TRACE_EVENT(nullb_report_zones, #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE null_blk_trace +#define TRACE_INCLUDE_FILE trace /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk/zoned.c index beb34b4f76b0..148b871f263b 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -4,19 +4,58 @@ #include "null_blk.h" #define CREATE_TRACE_POINTS -#include "null_blk_trace.h" +#include "trace.h" -/* zone_size in MBs to sectors. */ -#define ZONE_SIZE_SHIFT 11 +#define MB_TO_SECTS(mb) (((sector_t)mb * SZ_1M) >> SECTOR_SHIFT) static inline unsigned int null_zone_no(struct nullb_device *dev, sector_t sect) { return sect >> ilog2(dev->zone_size_sects); } +static inline void null_lock_zone_res(struct nullb_device *dev) +{ + if (dev->need_zone_res_mgmt) + spin_lock_irq(&dev->zone_res_lock); +} + +static inline void null_unlock_zone_res(struct nullb_device *dev) +{ + if (dev->need_zone_res_mgmt) + spin_unlock_irq(&dev->zone_res_lock); +} + +static inline void null_init_zone_lock(struct nullb_device *dev, + struct nullb_zone *zone) +{ + if (!dev->memory_backed) + spin_lock_init(&zone->spinlock); + else + mutex_init(&zone->mutex); +} + +static inline void null_lock_zone(struct nullb_device *dev, + struct nullb_zone *zone) +{ + if (!dev->memory_backed) + spin_lock_irq(&zone->spinlock); + else + mutex_lock(&zone->mutex); +} + +static inline void null_unlock_zone(struct nullb_device *dev, + struct nullb_zone *zone) +{ + if (!dev->memory_backed) + spin_unlock_irq(&zone->spinlock); + else + mutex_unlock(&zone->mutex); +} |