diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 15:37:03 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 15:37:03 -0700 |
commit | bce159d734091fe31340976081577333f52a85e4 (patch) | |
tree | 8396be51e6703797a60aefb4992e729f327d27c2 /drivers/nvme | |
parent | 750a02ab8d3c49ca7d23102be90d3d1db19e2827 (diff) | |
parent | 0c8d3fceade2ab1bbac68bca013e62bfdb851d19 (diff) |
Merge tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"On top of the core changes, here are the block driver changes for this
merge window:
- NVMe changes:
- NVMe over Fibre Channel protocol updates, which also reach
over to drivers/scsi/lpfc (James Smart)
- namespace revalidation support on the target (Anthony
Iliopoulos)
- gcc zero length array fix (Arnd Bergmann)
- nvmet cleanups (Chaitanya Kulkarni)
- misc cleanups and fixes (me, Keith Busch, Sagi Grimberg)
- use a SRQ per completion vector (Max Gurtovoy)
- fix handling of runtime changes to the queue count (Weiping
Zhang)
- t10 protection information support for nvme-rdma and
nvmet-rdma (Israel Rukshin and Max Gurtovoy)
- target side AEN improvements (Chaitanya Kulkarni)
- various fixes and minor improvements all over, icluding the
nvme part of the lpfc driver"
- Floppy code cleanup series (Willy, Denis)
- Floppy contention fix (Jiri)
- Loop CONFIGURE support (Martijn)
- bcache fixes/improvements (Coly, Joe, Colin)
- q->queuedata cleanups (Christoph)
- Get rid of ioctl_by_bdev (Christoph, Stefan)
- md/raid5 allocation fixes (Coly)
- zero length array fixes (Gustavo)
- swim3 task state fix (Xu)"
* tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block: (166 commits)
bcache: configure the asynchronous registertion to be experimental
bcache: asynchronous devices registration
bcache: fix refcount underflow in bcache_device_free()
bcache: Convert pr_<level> uses to a more typical style
bcache: remove redundant variables i and n
lpfc: Fix return value in __lpfc_nvme_ls_abort
lpfc: fix axchg pointer reference after free and double frees
lpfc: Fix pointer checks and comments in LS receive refactoring
nvme: set dma alignment to qword
nvmet: cleanups the loop in nvmet_async_events_process
nvmet: fix memory leak when removing namespaces and controllers concurrently
nvmet-rdma: add metadata/T10-PI support
nvmet: add metadata support for block devices
nvmet: add metadata/T10-PI support
nvme: add Metadata Capabilities enumerations
nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len
nvmet: rename nvmet_rw_len to nvmet_rw_data_len
nvmet: add metadata characteristics for a namespace
nvme-rdma: add metadata/T10-PI support
nvme-rdma: introduce nvme_rdma_sgl structure
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/core.c | 322 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 577 | ||||
-rw-r--r-- | drivers/nvme/host/fc.h | 227 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 7 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 16 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 28 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 117 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 321 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 64 | ||||
-rw-r--r-- | drivers/nvme/target/Kconfig | 1 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 42 | ||||
-rw-r--r-- | drivers/nvme/target/configfs.c | 272 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 166 | ||||
-rw-r--r-- | drivers/nvme/target/discovery.c | 8 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd.c | 15 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 805 | ||||
-rw-r--r-- | drivers/nvme/target/fcloop.c | 155 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-bdev.c | 118 | ||||
-rw-r--r-- | drivers/nvme/target/io-cmd-file.c | 23 | ||||
-rw-r--r-- | drivers/nvme/target/nvmet.h | 36 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 416 | ||||
-rw-r--r-- | drivers/nvme/target/tcp.c | 53 | ||||
-rw-r--r-- | drivers/nvme/target/trace.h | 28 |
23 files changed, 2954 insertions, 863 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cbf171636766..0585efa47d8f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -19,7 +19,6 @@ #include <linux/pr.h> #include <linux/ptrace.h> #include <linux/nvme_ioctl.h> -#include <linux/t10-pi.h> #include <linux/pm_qos.h> #include <asm/unaligned.h> @@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) nvme_put_ctrl(ctrl); } -static inline bool nvme_ns_has_pi(struct nvme_ns *ns) -{ - return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); -} - static blk_status_t nvme_error_status(u16 status) { switch (status & 0x7ff) { @@ -433,7 +427,6 @@ static void nvme_free_ns_head(struct kref *ref) nvme_mpath_remove_disk(head); ida_simple_remove(&head->subsys->ns_ida, head->instance); - list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); nvme_put_subsystem(head->subsys); kfree(head); @@ -530,7 +523,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, c.directive.opcode = nvme_admin_directive_recv; c.directive.nsid = cpu_to_le32(nsid); - c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1); + c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s))); c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; c.directive.dtype = NVME_DIR_STREAMS; @@ -553,19 +546,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL); if (ret) - return ret; + goto out_disable_stream; ctrl->nssa = le16_to_cpu(s.nssa); if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { dev_info(ctrl->device, "too few streams (%u) available\n", ctrl->nssa); - nvme_disable_streams(ctrl); - return 0; + goto out_disable_stream; } ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); return 0; + +out_disable_stream: + nvme_disable_streams(ctrl); + return ret; } /* @@ -1027,6 +1023,19 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); +/* + * In NVMe 1.0 the CNS field was just a binary controller or namespace + * flag, thus sending any new CNS opcodes has a big chance of not working. + * Qemu unfortunately had that bug after reporting a 1.1 version compliance + * (but not for any later version). + */ +static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl) +{ + if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS) + return ctrl->vs < NVME_VS(1, 2, 0); + return ctrl->vs < NVME_VS(1, 1, 0); +} + static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) { struct nvme_command c = { }; @@ -1290,7 +1299,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len = (io.nblocks + 1) * ns->ms; metadata = nvme_to_user_ptr(io.metadata); - if (ns->ext) { + if (ns->features & NVME_NS_EXT_LBAS) { length += meta_len; meta_len = 0; } else if (meta_len) { @@ -1392,8 +1401,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) } if (effects & NVME_CMD_EFFECTS_CCC) nvme_init_identify(ctrl); - if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) + if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { nvme_queue_scan(ctrl); + flush_work(&ctrl->scan_work); + } } static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, @@ -1682,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { struct blk_integrity integrity; @@ -1705,20 +1717,15 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) } integrity.tuple_size = ms; blk_integrity_register(disk, &integrity); - blk_queue_max_integrity_segments(disk->queue, 1); + blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_set_chunk_size(struct nvme_ns *ns) -{ - u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); - blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); -} - static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -1804,12 +1811,37 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; } +static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u32 *phys_bs, u32 *io_opt) +{ + struct streams_directive_params s; + int ret; + + if (!ctrl->nr_streams) + return 0; + + ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); + if (ret) + return ret; + + ns->sws = le32_to_cpu(s.sws); + ns->sgs = le16_to_cpu(s.sgs); + + if (ns->sws) { + *phys_bs = ns->sws * (1 << ns->lba_shift); + if (ns->sgs) + *io_opt = *phys_bs * ns->sgs; + } + + return 0; +} + static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); unsigned short bs = 1 << ns->lba_shift; - u32 atomic_bs, phys_bs, io_opt; + u32 atomic_bs, phys_bs, io_opt = 0; if (ns->lba_shift > PAGE_SHIFT) { /* unsupported block size, set capacity to 0 later */ @@ -1818,26 +1850,25 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); + atomic_bs = phys_bs = bs; + nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt); if (id->nabo == 0) { /* * Bit 1 indicates whether NAWUPF is defined for this namespace * and whether it should be used instead of AWUPF. If NAWUPF == * 0 then AWUPF must be used instead. */ - if (id->nsfeat & (1 << 1) && id->nawupf) + if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; - } else { - atomic_bs = bs; } - phys_bs = bs; - io_opt = bs; - if (id->nsfeat & (1 << 4)) { + + if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { /* NPWG = Namespace Preferred Write Granularity */ - phys_bs *= 1 + le16_to_cpu(id->npwg); + phys_bs = bs * (1 + le16_to_cpu(id->npwg)); /* NOWS = Namespace Optimal Write Size */ - io_opt *= 1 + le16_to_cpu(id->nows); + io_opt = bs * (1 + le16_to_cpu(id->nows)); } blk_queue_logical_block_size(disk->queue, bs); @@ -1850,19 +1881,34 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_queue_io_min(disk->queue, phys_bs); blk_queue_io_opt(disk->queue, io_opt); - if (ns->ms && !ns->ext && - (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns->ms, ns->pi_type); - if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || - ns->lba_shift > PAGE_SHIFT) + /* + * The block layer can't support LBA sizes larger than the page size + * yet, so catch this early and don't allow block I/O. + */ + if (ns->lba_shift > PAGE_SHIFT) capacity = 0; + /* + * Register a metadata profile for PI, or the plain non-integrity NVMe + * metadata masquerading as Type 0 if supported, otherwise reject block + * I/O to namespaces with metadata except when the namespace supports + * PI, as it can strip/insert in that case. + */ + if (ns->ms) { + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + (ns->features & NVME_NS_METADATA_SUPPORTED)) + nvme_init_integrity(disk, ns->ms, ns->pi_type, + ns->ctrl->max_integrity_segments); + else if (!nvme_ns_has_pi(ns)) + capacity = 0; + } + set_capacity_revalidate_and_notify(disk, capacity, false); nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); - if (id->nsattr & (1 << 0)) + if (id->nsattr & NVME_NS_ATTR_RO) set_disk_ro(disk, true); else set_disk_ro(disk, false); @@ -1870,9 +1916,11 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_unfreeze_queue(disk->queue); } -static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) +static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; + struct nvme_ctrl *ctrl = ns->ctrl; + u32 iob; /* * If identify namespace failed, use default 512 byte block size so @@ -1881,32 +1929,55 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; if (ns->lba_shift == 0) ns->lba_shift = 9; - ns->noiob = le16_to_cpu(id->noiob); + + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ctrl->max_hw_sectors)) + iob = ctrl->max_hw_sectors; + else + iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + + ns->features = 0; ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); /* the PI implementation requires metadata equal t10 pi tuple size */ if (ns->ms == sizeof(struct t10_pi_tuple)) ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else ns->pi_type = 0; - if (ns->noiob) - nvme_set_chunk_size(ns); + if (ns->ms) { + /* + * For PCIe only the separate metadata pointer is supported, + * as the block layer supplies metadata in a separate bio_vec + * chain. For Fabrics, only metadata as part of extended data + * LBA is supported on the wire per the Fabrics specification, + * but the HBA/HCA will do the remapping from the separate + * metadata buffers for us. + */ + if (id->flbas & NVME_NS_FLBAS_META_EXT) { + ns->features |= NVME_NS_EXT_LBAS; + if ((ctrl->ops->flags & NVME_F_FABRICS) && + (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) && + ctrl->max_integrity_segments) + ns->features |= NVME_NS_METADATA_SUPPORTED; + } else { + if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS)) + return -EINVAL; + if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) + ns->features |= NVME_NS_METADATA_SUPPORTED; + } + } + + if (iob) + blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); blk_queue_stack_limits(ns->head->disk->queue, ns->queue); - if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { - struct backing_dev_info *info = - ns->head->disk->queue->backing_dev_info; - - info->capabilities |= BDI_CAP_STABLE_WRITES; - } - revalidate_disk(ns->head->disk); } #endif + return 0; } static int nvme_revalidate_disk(struct gendisk *disk) @@ -1931,7 +2002,6 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto free_id; } - __nvme_revalidate_disk(disk, id); ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); if (ret) goto free_id; @@ -1940,8 +2010,10 @@ static int nvme_revalidate_disk(struct gendisk *disk) dev_err(ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); ret = -ENODEV; + goto free_id; } + ret = __nvme_revalidate_disk(disk, id); free_id: kfree(id); out: @@ -2249,10 +2321,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && - is_power_of_2(ctrl->max_hw_sectors)) - blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); + blk_queue_dma_alignment(q, 7); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; blk_queue_write_cache(q, vwc, vwc); @@ -2655,7 +2725,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, return false; } - if ((id->cmic & (1 << 1)) || + if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || (ctrl->opts && ctrl->opts->discovery_nqn)) continue; @@ -2746,7 +2816,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, void *log, size_t size, u64 offset) { struct nvme_command c = { }; - unsigned long dwlen = size / 4 - 1; + u32 dwlen = nvme_bytes_to_numd(size); c.get_log_page.opcode = nvme_admin_get_log_page; c.get_log_page.nsid = cpu_to_le32(nsid); @@ -3401,7 +3471,6 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, list_for_each_entry(h, &subsys->nsheads, entry) { if (nvme_ns_ids_valid(&new->ids) && - !list_empty(&h->list) && nvme_ns_ids_equal(&new->ids, &h->ids)) return -EINVAL; } @@ -3410,8 +3479,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_id_ns *id, - struct nvme_ns_ids *ids) + unsigned nsid, struct nvme_ns_ids *ids) { struct nvme_ns_head *head; size_t size = sizeof(*head); @@ -3469,42 +3537,51 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; - bool is_shared = id->nmic & (1 << 0); + bool is_shared = id->nmic & NVME_NS_NMIC_SHARED; struct nvme_ns_head *head = NULL; struct nvme_ns_ids ids; int ret = 0; ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); - if (ret) - goto out; + if (ret) { + if (ret < 0) + return ret; + return blk_status_to_errno(nvme_error_status(ret)); + } mutex_lock(&ctrl->subsys->lock); - if (is_shared) - head = nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { - head = nvme_alloc_ns_head(ctrl, nsid, id, &ids); + head = nvme_alloc_ns_head(ctrl, nsid, &ids); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; } + head->shared = is_shared; } else { + ret = -EINVAL; + if (!is_shared || !head->shared) { + dev_err(ctrl->device, + "Duplicate unshared namespace %d\n", nsid); + goto out_put_ns_head; + } if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", nsid); - ret = -EINVAL; - goto out_unlock; + goto out_put_ns_head; } } list_add_tail(&ns->siblings, &head->list); ns->head = head; + mutex_unlock(&ctrl->subsys->lock); + return 0; +out_put_ns_head: + nvme_put_ns_head(head); out_unlock: mutex_unlock(&ctrl->subsys->lock); -out: - if (ret > 0) - ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } @@ -3535,32 +3612,6 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) return ret; } -static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) -{ - struct streams_directive_params s; - int ret; - - if (!ctrl->nr_streams) - return 0; - - ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); - if (ret) - return ret; - - ns->sws = le32_to_cpu(s.sws); - ns->sgs = le16_to_cpu(s.sgs); - - if (ns->sws) { - unsigned int bs = 1 << ns->lba_shift; - - blk_queue_io_min(ns->queue, bs * ns->sws); - if (ns->sgs) - blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); - } - - return 0; -} - static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; @@ -3604,7 +3655,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ret = nvme_init_ns_head(ns, nsid, id); if (ret) goto out_free_id; - nvme_setup_streams_ns(ctrl, ns); nvme_set_disk_name(disk_name, ns, ctrl, &flags); disk = alloc_disk_node(0, node); @@ -3618,7 +3668,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); ns->disk = disk; - __nvme_revalidate_disk(disk, id); + if (__nvme_revalidate_disk(disk, id)) + goto out_free_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -3645,9 +3696,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) /* prevent double queue cleanup */ ns->disk->queue = NULL; put_disk(ns->disk); + out_free_disk: + del_gendisk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) + list_del_init(&ns->head->entry); mutex_unlock(&ctrl->subsys->lock); nvme_put_ns_head(ns->head); out_free_id: @@ -3667,7 +3722,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) + list_del_init(&ns->head->entry); mutex_unlock(&ns->ctrl->subsys->lock); + synchronize_rcu(); /* guarantee not available in head->list */ nvme_mpath_clear_current_path(ns); synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ @@ -3687,6 +3745,16 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_put_ns(ns); } +static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) +{ + struct nvme_ns *ns = nvme_find_get_ns(ctrl, nsid); + + if (ns) { + nvme_ns_remove(ns); + nvme_put_ns(ns); + } +} + static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; @@ -3718,39 +3786,34 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, } -static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) +static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) { - struct nvme_ns *ns; + const int nr_entries = NVME_IDENTIFY_DATA_SIZE / sizeof(__le32); __le32 *ns_list; - unsigned i, j, nsid, prev = 0; - unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024); - int ret = 0; + u32 prev = 0; + int ret = 0, i; + + if (nvme_ctrl_limited_cns(ctrl)) + return -EOPNOTSUPP; ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); if (!ns_list) return -ENOMEM; - for (i = 0; i < num_lists; i++) { + for (;;) { ret = nvme_identify_ns_list(ctrl, prev, ns_list); if (ret) goto free; - for (j = 0; j < min(nn, 1024U); j++) { - nsid = le32_to_cpu(ns_list[j]); - if (!nsid) - goto out; + for (i = 0; i < nr_entries; i++) { + u32 nsid = le32_to_cpu(ns_list[i]); + if (!nsid) /* end of the list? */ + goto out; nvme_validate_ns(ctrl, nsid); - - while (++prev < nsid) { - ns = nvme_find_get_ns(ctrl, prev); - if (ns) { - nvme_ns_remove(ns); - nvme_put_ns(ns); - } - } + while (++prev < nsid) + nvme_ns_remove_by_nsid(ctrl, prev); } - nn -= j; } out: nvme_remove_invalid_namespaces(ctrl, prev); @@ -3759,9 +3822,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) return ret; } -static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) +static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl) { - unsigned i; + struct nvme_id_ctrl *id; + u32 nn, i; + + if (nvme_identify_ctrl(ctrl, &id)) + return; + nn = le32_to_cpu(id->nn); + kfree(id); for (i = 1; i <= nn; i++) nvme_validate_ns(ctrl, i); @@ -3798,8 +3867,6 @@ static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, scan_work); - struct nvme_id_ctrl *id; - unsigned nn; /* No tagset on a live ctrl means IO queues could not created */ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) @@ -3810,20 +3877,11 @@ static void nvme_scan_work(struct work_struct *work) nvme_clear_changed_ns_log(ctrl); } - if (nvme_identify_ctrl(ctrl, &id)) - return; - mutex_lock(&ctrl->scan_lock); - nn = le32_to_cpu(id->nn); - if (ctrl->vs >= NVME_VS(1, 1, 0) && - !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { - if (!nvme_scan_ns_list(ctrl, nn)) - goto out_free_id; - } - nvme_scan_ns_sequential(ctrl, nn); -out_free_id: + if (nvme_scan_ns_list(ctrl) != 0) + nvme_scan_ns_sequential(ctrl); mutex_unlock(&ctrl->scan_lock); - kfree(id); + down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); up_write(&ctrl->namespaces_rwsem); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7dfc4a2ecf1e..cb0007592c12 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -14,6 +14,7 @@ #include "fabrics.h" #include <linux/nvme-fc-driver.h> #include <linux/nvme-fc.h> +#include "fc.h" #include <scsi/scsi_transport_fc.h> /* *************************** Data Structures/Defines ****************** */ @@ -61,6 +62,17 @@ struct nvmefc_ls_req_op { bool req_queued; }; +struct nvmefc_ls_rcv_op { + struct nvme_fc_rport *rport; + struct nvmefc_ls_rsp *lsrsp; + union nvmefc_ls_requests *rqstbuf; + union nvmefc_ls_responses *rspbuf; + u16 rqstdatalen; + bool handled; + dma_addr_t rspdma; + struct list_head lsrcv_list; /* rport->ls_rcv_list */ +} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ + enum nvme_fcpop_state { FCPOP_STATE_UNINIT = 0, FCPOP_STATE_IDLE = 1, @@ -96,7 +108,7 @@ struct nvme_fc_fcp_op { struct nvme_fcp_op_w_sgl { struct nvme_fc_fcp_op op; struct scatterlist sgl[NVME_INLINE_SG_CNT]; - uint8_t priv[0]; + uint8_t priv[]; }; struct nvme_fc_lport { @@ -117,6 +129,7 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; struct list_head ls_req_list; + struct list_head ls_rcv_list; struct list_head disc_list; struct device *dev; /* physical device for dma */ struct nvme_fc_lport *lport; @@ -124,11 +137,12 @@ struct nvme_fc_rport { struct kref ref; atomic_t act_ctrl_cnt; unsigned long dev_loss_end; + struct work_struct lsrcv_work; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ -enum nvme_fcctrl_flags { - FCCTRL_TERMIO = (1 << 0), -}; +/* fc_ctrl flags values - specified as bit positions */ +#define ASSOC_ACTIVE 0 +#define FCCTRL_TERMIO 1 struct nvme_fc_ctrl { spinlock_t lock; @@ -139,9 +153,9 @@ struct nvme_fc_ctrl { u32 cnum; bool ioq_live; - bool assoc_active; atomic_t err_work_active; u64 association_id; + struct nvmefc_ls_rcv_op *rcv_disconn; struct list_head ctrl_list; /* rport->ctrl_list */ @@ -152,7 +166,7 @@ struct nvme_fc_ctrl { struct work_struct err_work; struct kref ref; - u32 flags; + unsigned long flags; u32 iocnt; wait_queue_head_t ioabort_wait; @@ -219,6 +233,9 @@ static struct device *fc_udev_device; static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); +static void nvme_fc_handle_ls_rqst_work(struct work_struct *work); + + static void nvme_fc_free_lport(struct kref *ref) { @@ -394,7 +411,10 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, newrec->ops = template; newrec->dev = dev; ida_init(&newrec->endp_cnt); - newrec->localport.private = &newrec[1]; + if (template->local_priv_sz) + newrec->localport.private = &newrec[1]; + else + newrec->localport.private = NULL; newrec->localport.node_name = pinfo->node_name; newrec->localport.port_name = pinfo->port_name; newrec->localport.port_role = pinfo->port_role; @@ -701,9 +721,13 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, atomic_set(&newrec->act_ctrl_cnt, 0); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + INIT_LIST_HEAD(&newrec->ls_rcv_list); newrec->dev = lport->dev; newrec->lport = lport; - newrec->remoteport.private = &newrec[1]; + if (lport->ops->remote_priv_sz) + newrec->remoteport.private = &newrec[1]; + else + newrec->remoteport.private = NULL; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; newrec->remoteport.port_name = pinfo->port_name; @@ -711,6 +735,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; newrec->remoteport.port_num = idx; __nvme_fc_set_dev_loss_tmo(newrec, pinfo); + INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work); spin_lock_irqsave(&nvme_fc_lock, flags); list_add_tail(&newrec->endp_list, &lport->endp_list); @@ -1000,6 +1025,7 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); +static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); static void __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) @@ -1140,41 +1166,6 @@ nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, return __nvme_fc_send_ls_req(rport, lsop, done); } -/* Validation Error indexes into the string table below */ -enum { - VERR_NO_ERROR = 0, - VERR_LSACC = 1, - VERR_LSDESC_RQST = 2, - VERR_LSDESC_RQST_LEN = 3, - VERR_ASSOC_ID = 4, - VERR_ASSOC_ID_LEN = 5, - VERR_CONN_ID = 6, - VERR_CONN_ID_LEN = 7, - VERR_CR_ASSOC = 8, - VERR_CR_ASSOC_ACC_LEN = 9, - VERR_CR_CONN = 10, - VERR_CR_CONN_ACC_LEN = 11, - VERR_DISCONN = 12, - VERR_DISCONN_ACC_LEN = 13, -}; - -static char *validation_errors[] = { - "OK", - "Not LS_ACC", - "Not LSDESC_RQST", - "Bad LSDESC_RQST Length", - "Not Association ID", - "Bad Association ID Length", - "Not Connection ID", - "Bad Connection ID Length", - "Not CR_ASSOC Rqst", - "Bad CR_ASSOC ACC Length", - "Not CR_CONN Rqst", - "Bad CR_CONN ACC Length", - "Not Disconnect Rqst", - "Bad Disconnect ACC Length", -}; - static int nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) @@ -1183,21 +1174,27 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req *lsreq; struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; struct fcnvme_ls_cr_assoc_acc *assoc_acc; + unsigned long flags; int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); + sizeof(*assoc_rqst) + sizeof(*assoc_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Create Association failed: ENOMEM\n", + ctrl->cnum); ret = -ENOMEM; goto out_no_memory; } - lsreq = &lsop->ls_req; - lsreq->private = (void *)& |