diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-29 11:51:49 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-29 11:51:49 -0800 |
commit | 0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch) | |
tree | cefccd67dc1f27bb45830f6b8065dd4a1c05e83b /drivers/nvme | |
parent | 9697e9da84299d0d715d515dd2cc48f1eceb277d (diff) | |
parent | 796baeeef85a40b3495a907fb7425086e7010102 (diff) |
Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main pull request for block IO related changes for the
4.16 kernel. Nothing major in this pull request, but a good amount of
improvements and fixes all over the map. This contains:
- BFQ improvements, fixes, and cleanups from Angelo, Chiara, and
Paolo.
- Support for SMR zones for deadline and mq-deadline from Damien and
Christoph.
- Set of fixes for bcache by way of Michael Lyle, including fixes
from himself, Kent, Rui, Tang, and Coly.
- Series from Matias for lightnvm with fixes from Hans Holmberg,
Javier, and Matias. Mostly centered around pblk, and the removing
rrpc 1.2 in preparation for supporting 2.0.
- A couple of NVMe pull requests from Christoph. Nothing major in
here, just fixes and cleanups, and support for command tracing from
Johannes.
- Support for blk-throttle for tracking reads and writes separately.
From Joseph Qi. A few cleanups/fixes also for blk-throttle from
Weiping.
- Series from Mike Snitzer that enables dm to register its queue more
logically, something that's alwways been problematic on dm since
it's a stacked device.
- Series from Ming cleaning up some of the bio accessor use, in
preparation for supporting multipage bvecs.
- Various fixes from Ming closing up holes around queue mapping and
quiescing.
- BSD partition fix from Richard Narron, fixing a problem where we
can't mount newer (10/11) FreeBSD partitions.
- Series from Tejun reworking blk-mq timeout handling. The previous
scheme relied on atomic bits, but it had races where we would think
a request had timed out if it to reused at the wrong time.
- null_blk now supports faking timeouts, to enable us to better
exercise and test that functionality separately. From me.
- Kill the separate atomic poll bit in the request struct. After
this, we don't use the atomic bits on blk-mq anymore at all. From
me.
- sgl_alloc/free helpers from Bart.
- Heavily contended tag case scalability improvement from me.
- Various little fixes and cleanups from Arnd, Bart, Corentin,
Douglas, Eryu, Goldwyn, and myself"
* 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits)
block: remove smart1,2.h
nvme: add tracepoint for nvme_complete_rq
nvme: add tracepoint for nvme_setup_cmd
nvme-pci: introduce RECONNECTING state to mark initializing procedure
nvme-rdma: remove redundant boolean for inline_data
nvme: don't free uuid pointer before printing it
nvme-pci: Suspend queues after deleting them
bsg: use pr_debug instead of hand crafted macros
blk-mq-debugfs: don't allow write on attributes with seq_operations set
nvme-pci: Fix queue double allocations
block: Set BIO_TRACE_COMPLETION on new bio during split
blk-throttle: use queue_is_rq_based
block: Remove kblockd_schedule_delayed_work{,_on}()
blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays
blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly()
lib/scatterlist: Fix chaining support in sgl_alloc_order()
blk-throttle: track read and write request individually
block: add bdev_read_only() checks to common helpers
block: fail op_is_write() requests to read-only partitions
blk-throttle: export io_serviced_recursive, io_service_bytes_recursive
...
Diffstat (limited to 'drivers/nvme')
-rw-r--r-- | drivers/nvme/host/Makefile | 4 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 134 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 22 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 2 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 7 | ||||
-rw-r--r-- | drivers/nvme/host/lightnvm.c | 185 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 44 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 9 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 216 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 6 | ||||
-rw-r--r-- | drivers/nvme/host/trace.c | 130 | ||||
-rw-r--r-- | drivers/nvme/host/trace.h | 165 | ||||
-rw-r--r-- | drivers/nvme/target/Kconfig | 2 | ||||
-rw-r--r-- | drivers/nvme/target/core.c | 14 | ||||
-rw-r--r-- | drivers/nvme/target/fabrics-cmd.c | 2 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 60 | ||||
-rw-r--r-- | drivers/nvme/target/fcloop.c | 244 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 3 | ||||
-rw-r--r-- | drivers/nvme/target/rdma.c | 83 |
19 files changed, 833 insertions, 499 deletions
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile index a25fd43650ad..441e67e3a9d7 100644 --- a/drivers/nvme/host/Makefile +++ b/drivers/nvme/host/Makefile @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 + +ccflags-y += -I$(src) + obj-$(CONFIG_NVME_CORE) += nvme-core.o obj-$(CONFIG_BLK_DEV_NVME) += nvme.o obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o @@ -6,6 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o obj-$(CONFIG_NVME_FC) += nvme-fc.o nvme-core-y := core.o +nvme-core-$(CONFIG_TRACING) += trace.o nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o nvme-core-$(CONFIG_NVM) += lightnvm.o diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 839650e0926a..e8104871cbbf 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -29,6 +29,9 @@ #include <linux/pm_qos.h> #include <asm/unaligned.h> +#define CREATE_TRACE_POINTS +#include "trace.h" + #include "nvme.h" #include "fabrics.h" @@ -65,9 +68,26 @@ static bool streams; module_param(streams, bool, 0644); MODULE_PARM_DESC(streams, "turn on support for Streams write directives"); +/* + * nvme_wq - hosts nvme related works that are not reset or delete + * nvme_reset_wq - hosts nvme reset works + * nvme_delete_wq - hosts nvme delete works + * + * nvme_wq will host works such are scan, aen handling, fw activation, + * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq + * runs reset works which also flush works hosted on nvme_wq for + * serialization purposes. nvme_delete_wq host controller deletion + * works which flush reset works for serialization. + */ struct workqueue_struct *nvme_wq; EXPORT_SYMBOL_GPL(nvme_wq); +struct workqueue_struct *nvme_reset_wq; +EXPORT_SYMBOL_GPL(nvme_reset_wq); + +struct workqueue_struct *nvme_delete_wq; +EXPORT_SYMBOL_GPL(nvme_delete_wq); + static DEFINE_IDA(nvme_subsystems_ida); static LIST_HEAD(nvme_subsystems); static DEFINE_MUTEX(nvme_subsystems_lock); @@ -89,13 +109,13 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) return -EBUSY; - if (!queue_work(nvme_wq, &ctrl->reset_work)) + if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) return -EBUSY; return 0; } EXPORT_SYMBOL_GPL(nvme_reset_ctrl); -static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) +int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) { int ret; @@ -104,6 +124,7 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) flush_work(&ctrl->reset_work); return ret; } +EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync); static void nvme_delete_ctrl_work(struct work_struct *work) { @@ -122,7 +143,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) return -EBUSY; - if (!queue_work(nvme_wq, &ctrl->delete_work)) + if (!queue_work(nvme_delete_wq, &ctrl->delete_work)) return -EBUSY; return 0; } @@ -157,13 +178,20 @@ static blk_status_t nvme_error_status(struct request *req) return BLK_STS_OK; case NVME_SC_CAP_EXCEEDED: return BLK_STS_NOSPC; + case NVME_SC_LBA_RANGE: + return BLK_STS_TARGET; + case NVME_SC_BAD_ATTRIBUTES: case NVME_SC_ONCS_NOT_SUPPORTED: + case NVME_SC_INVALID_OPCODE: + case NVME_SC_INVALID_FIELD: + case NVME_SC_INVALID_NS: return BLK_STS_NOTSUPP; case NVME_SC_WRITE_FAULT: case NVME_SC_READ_ERROR: case NVME_SC_UNWRITTEN_BLOCK: case NVME_SC_ACCESS_DENIED: case NVME_SC_READ_ONLY: + case NVME_SC_COMPARE_FAILED: return BLK_STS_MEDIUM; case NVME_SC_GUARD_CHECK: case NVME_SC_APPTAG_CHECK: @@ -190,8 +218,12 @@ static inline bool nvme_req_needs_retry(struct request *req) void nvme_complete_rq(struct request *req) { - if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) { - if (nvme_req_needs_failover(req)) { + blk_status_t status = nvme_error_status(req); + + trace_nvme_complete_rq(req); + + if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { + if (nvme_req_needs_failover(req, status)) { nvme_failover_req(req); return; } @@ -202,8 +234,7 @@ void nvme_complete_rq(struct request *req) return; } } - - blk_mq_end_request(req, nvme_error_status(req)); + blk_mq_end_request(req, status); } EXPORT_SYMBOL_GPL(nvme_complete_rq); @@ -232,6 +263,15 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, old_state = ctrl->state; switch (new_state) { + case NVME_CTRL_ADMIN_ONLY: + switch (old_state) { + case NVME_CTRL_RECONNECTING: + changed = true; + /* FALLTHRU */ + default: + break; + } + break; case NVME_CTRL_LIVE: switch (old_state) { case NVME_CTRL_NEW: @@ -247,6 +287,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, switch (old_state) { case NVME_CTRL_NEW: case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: changed = true; /* FALLTHRU */ default: @@ -266,6 +307,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, case NVME_CTRL_DELETING: switch (old_state) { case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: case NVME_CTRL_RESETTING: case NVME_CTRL_RECONNECTING: changed = true; @@ -591,6 +633,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, } cmd->common.command_id = req->tag; + if (ns) + trace_nvme_setup_nvm_cmd(req->q->id, cmd); + else + trace_nvme_setup_admin_cmd(cmd); return ret; } EXPORT_SYMBOL_GPL(nvme_setup_cmd); @@ -1217,16 +1263,27 @@ static int nvme_open(struct block_device *bdev, fmode_t mode) #ifdef CONFIG_NVME_MULTIPATH /* should never be called due to GENHD_FL_HIDDEN */ if (WARN_ON_ONCE(ns->head->disk)) - return -ENXIO; + goto fail; #endif if (!kref_get_unless_zero(&ns->kref)) - return -ENXIO; + goto fail; + if (!try_module_get(ns->ctrl->ops->module)) + goto fail_put_ns; + return 0; + +fail_put_ns: + nvme_put_ns(ns); +fail: + return -ENXIO; } static void nvme_release(struct gendisk *disk, fmode_t mode) { - nvme_put_ns(disk->private_data); + struct nvme_ns *ns = disk->private_data; + + module_put(ns->ctrl->ops->module); + nvme_put_ns(ns); } static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) @@ -2052,6 +2109,22 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { NULL, }; +static int nvme_active_ctrls(struct nvme_subsystem *subsys) +{ + int count = 0; + struct nvme_ctrl *ctrl; + + mutex_lock(&subsys->lock); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { + if (ctrl->state != NVME_CTRL_DELETING && + ctrl->state != NVME_CTRL_DEAD) + count++; + } + mutex_unlock(&subsys->lock); + + return count; +} + static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { struct nvme_subsystem *subsys, *found; @@ -2090,7 +2163,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) * Verify that the subsystem actually supports multiple * controllers, else bail out. */ - if (!(id->cmic & (1 << 1))) { + if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { dev_err(ctrl->device, "ignoring ctrl due to duplicate subnqn (%s).\n", found->subnqn); @@ -2257,7 +2330,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) shutdown_timeout, 60); if (ctrl->shutdown_timeout != shutdown_timeout) - dev_warn(ctrl->device, + dev_info(ctrl->device, "Shutdown timeout set to %u seconds\n", ctrl->shutdown_timeout); } else @@ -2341,8 +2414,14 @@ static int nvme_dev_open(struct inode *inode, struct file *file) struct nvme_ctrl *ctrl = container_of(inode->i_cdev, struct nvme_ctrl, cdev); - if (ctrl->state != NVME_CTRL_LIVE) + switch (ctrl->state) { + case NVME_CTRL_LIVE: + case NVME_CTRL_ADMIN_ONLY: + break; + default: return -EWOULDBLOCK; + } + file->private_data = ctrl; return 0; } @@ -2606,6 +2685,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, static const char *const state_name[] = { [NVME_CTRL_NEW] = "new", [NVME_CTRL_LIVE] = "live", + [NVME_CTRL_ADMIN_ONLY] = "only-admin", [NVME_CTRL_RESETTING] = "resetting", [NVME_CTRL_RECONNECTING]= "reconnecting", [NVME_CTRL_DELETING] = "deleting", @@ -3079,6 +3159,8 @@ static void nvme_scan_work(struct work_struct *work) if (ctrl->state != NVME_CTRL_LIVE) return; + WARN_ON_ONCE(!ctrl->tagset); + if (nvme_identify_ctrl(ctrl, &id)) return; @@ -3099,8 +3181,7 @@ static void nvme_scan_work(struct work_struct *work) void nvme_queue_scan(struct nvme_ctrl *ctrl) { /* - * Do not queue new scan work when a controller is reset during - * removal. + * Only new queue scan work when admin and IO queues are both alive */ if (ctrl->state == NVME_CTRL_LIVE) queue_work(nvme_wq, &ctrl->scan_work); @@ -3477,16 +3558,26 @@ EXPORT_SYMBOL_GPL(nvme_reinit_tagset); int __init nvme_core_init(void) { - int result; + int result = -ENOMEM; nvme_wq = alloc_workqueue("nvme-wq", WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); if (!nvme_wq) - return -ENOMEM; + goto out; + + nvme_reset_wq = alloc_workqueue("nvme-reset-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvme_reset_wq) + goto destroy_wq; + + nvme_delete_wq = alloc_workqueue("nvme-delete-wq", + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); + if (!nvme_delete_wq) + goto destroy_reset_wq; result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme"); if (result < 0) - goto destroy_wq; + goto destroy_delete_wq; nvme_class = class_create(THIS_MODULE, "nvme"); if (IS_ERR(nvme_class)) { @@ -3505,8 +3596,13 @@ destroy_class: class_destroy(nvme_class); unregister_chrdev: unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); +destroy_delete_wq: + destroy_workqueue(nvme_delete_wq); +destroy_reset_wq: + destroy_workqueue(nvme_reset_wq); destroy_wq: destroy_workqueue(nvme_wq); +out: return result; } @@ -3516,6 +3612,8 @@ void nvme_core_exit(void) class_destroy(nvme_subsys_class); class_destroy(nvme_class); unregister_chrdev_region(nvme_chr_devt, NVME_MINORS); + destroy_workqueue(nvme_delete_wq); + destroy_workqueue(nvme_reset_wq); destroy_workqueue(nvme_wq); } diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 894c2ccb3891..5dd4ceefed8f 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -493,7 +493,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect); */ int nvmf_register_transport(struct nvmf_transport_ops *ops) { - if (!ops->create_ctrl) + if (!ops->create_ctrl || !ops->module) return -EINVAL; down_write(&nvmf_transports_rwsem); @@ -739,11 +739,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -ENOMEM; goto out; } - if (uuid_parse(p, &hostid)) { + ret = uuid_parse(p, &hostid); + if (ret) { pr_err("Invalid hostid %s\n", p); ret = -EINVAL; + kfree(p); goto out; } + kfree(p); break; case NVMF_OPT_DUP_CONNECT: opts->duplicate_connect = true; @@ -869,32 +872,41 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_unlock; } + if (!try_module_get(ops->module)) { + ret = -EBUSY; + goto out_unlock; + } + ret = nvmf_check_required_opts(opts, ops->required_opts); if (ret) - goto out_unlock; + goto out_module_put; ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS | ops->allowed_opts | ops->required_opts); if (ret) - goto out_unlock; + goto out_module_put; ctrl = ops->create_ctrl(dev, opts); if (IS_ERR(ctrl)) { ret = PTR_ERR(ctrl); - goto out_unlock; + goto out_module_put; } if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) { dev_warn(ctrl->device, "controller returned incorrect NQN: \"%s\".\n", ctrl->subsys->subnqn); + module_put(ops->module); up_read(&nvmf_transports_rwsem); nvme_delete_ctrl_sync(ctrl); return ERR_PTR(-EINVAL); } + module_put(ops->module); up_read(&nvmf_transports_rwsem); return ctrl; +out_module_put: + module_put(ops->module); out_unlock: up_read(&nvmf_transports_rwsem); out_free_opts: diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 9ba614953607..25b19f722f5b 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -108,6 +108,7 @@ struct nvmf_ctrl_options { * fabric implementation of NVMe fabrics. * @entry: Used by the fabrics library to add the new * registration entry to its linked-list internal tree. + * @module: Transport module reference * @name: Name of the NVMe fabric driver implementation. * @required_opts: sysfs command-line options that must be specified * when adding a new NVMe controller. @@ -126,6 +127,7 @@ struct nvmf_ctrl_options { */ struct nvmf_transport_ops { struct list_head entry; + struct module *module; const char *name; int required_opts; int allowed_opts; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 794e66e4aa20..99bf51c7e513 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2921,6 +2921,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); nvme_fc_free_queue(&ctrl->queues[0]); + /* re-enable the admin_q so anything new can fast fail */ + blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); + nvme_fc_ctlr_inactive_on_rport(ctrl); } @@ -2935,6 +2938,9 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) * waiting for io to terminate */ nvme_fc_delete_association(ctrl); + + /* resume the io queues so that things will fast fail */ + nvme_start_queues(nctrl); } static void @@ -3380,6 +3386,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) static struct nvmf_transport_ops nvme_fc_transport = { .name = "fc", + .module = THIS_MODULE, .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, .create_ctrl = nvme_fc_create_ctrl, diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index ba3d7f3349e5..50ef71ee3d86 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -31,27 +31,10 @@ enum nvme_nvm_admin_opcode { nvme_nvm_admin_identity = 0xe2, - nvme_nvm_admin_get_l2p_tbl = 0xea, nvme_nvm_admin_get_bb_tbl = 0xf2, nvme_nvm_admin_set_bb_tbl = 0xf1, }; -struct nvme_nvm_hb_rw { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __u64 rsvd2; - __le64 metadata; - __le64 prp1; - __le64 prp2; - __le64 spba; - __le16 length; - __le16 control; - __le32 dsmgmt; - __le64 slba; -}; - struct nvme_nvm_ph_rw { __u8 opcode; __u8 flags; @@ -80,19 +63,6 @@ struct nvme_nvm_identity { __u32 rsvd11[5]; }; -struct nvme_nvm_l2ptbl { - __u8 opcode; - __u8 flags; - __u16 command_id; - __le32 nsid; - __le32 cdw2[4]; - __le64 prp1; - __le64 prp2; - __le64 slba; - __le32 nlb; - __le16 cdw14[6]; -}; - struct nvme_nvm_getbbtbl { __u8 opcode; __u8 flags; @@ -139,9 +109,7 @@ struct nvme_nvm_command { union { struct nvme_common_command common; struct nvme_nvm_identity identity; - struct nvme_nvm_hb_rw hb_rw; struct nvme_nvm_ph_rw ph_rw; - struct nvme_nvm_l2ptbl l2p; struct nvme_nvm_getbbtbl get_bb; struct nvme_nvm_setbbtbl set_bb; struct nvme_nvm_erase_blk erase; @@ -167,7 +135,7 @@ struct nvme_nvm_id_group { __u8 num_lun; __u8 num_pln; __u8 rsvd1; - __le16 num_blk; + __le16 num_chk; __le16 num_pg; __le16 fpg_sz; __le16 csecs; @@ -234,11 +202,9 @@ struct nvme_nvm_bb_tbl { static inline void _nvme_nvm_check_size(void) { BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64); - BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64); BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960); BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16); @@ -249,51 +215,58 @@ static inline void _nvme_nvm_check_size(void) static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id) { struct nvme_nvm_id_group *src; - struct nvm_id_group *dst; + struct nvm_id_group *grp; + int sec_per_pg, sec_per_pl, pg_per_blk; if (nvme_nvm_id->cgrps != 1) return -EINVAL; src = &nvme_nvm_id->groups[0]; - dst = &nvm_id->grp; - - dst->mtype = src->mtype; - dst->fmtype = src->fmtype; - dst->num_ch = src->num_ch; - dst->num_lun = src->num_lun; - dst->num_pln = src->num_pln; - - dst->num_pg = le16_to_cpu(src->num_pg); - dst->num_blk = le16_to_cpu(src->num_blk); - dst->fpg_sz = le16_to_cpu(src->fpg_sz); - dst->csecs = le16_to_cpu(src->csecs); - dst->sos = le16_to_cpu(src->sos); - - dst->trdt = le32_to_cpu(src->trdt); - dst->trdm = le32_to_cpu(src->trdm); - dst->tprt = le32_to_cpu(src->tprt); - dst->tprm = le32_to_cpu(src->tprm); - dst->tbet = le32_to_cpu(src->tbet); - dst->tbem = le32_to_cpu(src->tbem); - dst->mpos = le32_to_cpu(src->mpos); - dst->mccap = le32_to_cpu(src->mccap); - - dst->cpar = le16_to_cpu(src->cpar); - - if (dst->fmtype == NVM_ID_FMTYPE_MLC) { - memcpy(dst->lptbl.id, src->lptbl.id, 8); - dst->lptbl.mlc.num_pairs = - le16_to_cpu(src->lptbl.mlc.num_pairs); - - if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) { - pr_err("nvm: number of MLC pairs not supported\n"); - return -EINVAL; - } + grp = &nvm_id->grp; + + grp->mtype = src->mtype; + grp->fmtype = src->fmtype; + + grp->num_ch = src->num_ch; + grp->num_lun = src->num_lun; + + grp->num_chk = le16_to_cpu(src->num_chk); + grp->csecs = le16_to_cpu(src->csecs); + grp->sos = le16_to_cpu(src->sos); + + pg_per_blk = le16_to_cpu(src->num_pg); + sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs; + sec_per_pl = sec_per_pg * src->num_pln; + grp->clba = sec_per_pl * pg_per_blk; + grp->ws_per_chk = pg_per_blk; - memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs, - dst->lptbl.mlc.num_pairs); + grp->mpos = le32_to_cpu(src->mpos); + grp->cpar = le16_to_cpu(src->cpar); + grp->mccap = le32_to_cpu(src->mccap); + + grp->ws_opt = grp->ws_min = sec_per_pg; + grp->ws_seq = NVM_IO_SNGL_ACCESS; + + if (grp->mpos & 0x020202) { + grp->ws_seq = NVM_IO_DUAL_ACCESS; + grp->ws_opt <<= 1; + } else if (grp->mpos & 0x040404) { + grp->ws_seq = NVM_IO_QUAD_ACCESS; + grp->ws_opt <<= 2; } + grp->trdt = le32_to_cpu(src->trdt); + grp->trdm = le32_to_cpu(src->trdm); + grp->tprt = le32_to_cpu(src->tprt); + grp->tprm = le32_to_cpu(src->tprm); + grp->tbet = le32_to_cpu(src->tbet); + grp->tbem = le32_to_cpu(src->tbem); + + /* 1.2 compatibility */ + grp->num_pln = src->num_pln; + grp->num_pg = le16_to_cpu(src->num_pg); + grp->fpg_sz = le16_to_cpu(src->fpg_sz); + return 0; } @@ -332,62 +305,6 @@ out: return ret; } -static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb, - nvm_l2p_update_fn *update_l2p, void *priv) -{ - struct nvme_ns *ns = nvmdev->q->queuedata; - struct nvme_nvm_command c = {}; - u32 len = queue_max_hw_sectors(ns->ctrl->admin_q) << 9; - u32 nlb_pr_rq = len / sizeof(u64); - u64 cmd_slba = slba; - void *entries; - int ret = 0; - - c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl; - c.l2p.nsid = cpu_to_le32(ns->head->ns_id); - entries = kmalloc(len, GFP_KERNEL); - if (!entries) - return -ENOMEM; - - while (nlb) { - u32 cmd_nlb = min(nlb_pr_rq, nlb); - u64 elba = slba + cmd_nlb; - - c.l2p.slba = cpu_to_le64(cmd_slba); - c.l2p.nlb = cpu_to_le32(cmd_nlb); - - ret = nvme_submit_sync_cmd(ns->ctrl->admin_q, - (struct nvme_command *)&c, entries, len); - if (ret) { - dev_err(ns->ctrl->device, - "L2P table transfer failed (%d)\n", ret); - ret = -EIO; - goto out; - } - - if (unlikely(elba > nvmdev->total_secs)) { - pr_err("nvm: L2P data from device is out of bounds!\n"); - ret = -EINVAL; - goto out; - } - - /* Transform physical address to target address space */ - nvm_part_to_tgt(nvmdev, entries, cmd_nlb); - - if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) { - ret = -EINTR; - goto out; - } - - cmd_slba += cmd_nlb; - nlb -= cmd_nlb; - } - -out: - kfree(entries); - return ret; -} - static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, u8 *blks) { @@ -397,7 +314,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_nvm_command c = {}; struct nvme_nvm_bb_tbl *bb_tbl; - int nr_blks = geo->blks_per_lun * geo->plane_mode; + int nr_blks = geo->nr_chks * geo->plane_mode; int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks; int ret = 0; @@ -438,7 +355,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa, goto out; } - memcpy(blks, bb_tbl->blk, geo->blks_per_lun * geo->plane_mode); + memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode); out: kfree(bb_tbl); return ret; @@ -474,10 +391,6 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns, c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list); c->ph_rw.control = cpu_to_le16(rqd->flags); c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1); - - if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD) - c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns, - rqd->bio->bi_iter.bi_sector)); } static void nvme_nvm_end_io(struct request *rq, blk_status_t status) @@ -597,8 +510,6 @@ static void nvme_nvm_dev_dma_free(void *pool, void *addr, static struct nvm_dev_ops nvme_nvm_dev_ops = { .identity = nvme_nvm_identity, - .get_l2p_tbl = nvme_nvm_get_l2p_tbl, - .get_bb_tbl = nvme_nvm_get_bb_tbl, .set_bb_tbl = nvme_nvm_set_bb_tbl, @@ -883,7 +794,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev, } else if (strcmp(attr->name, "num_planes") == 0) { return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln); } else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */ - return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_blk); + return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk); } else if (strcmp(attr->name, "num_pages") == 0) { return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg); } else if (strcmp(attr->name, "page_size") == 0) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 1218a9fca846..3b211d9e58b8 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -33,51 +33,11 @@ void nvme_failover_req(struct request *req) kblockd_schedule_work(&ns->head->requeue_work); } -bool nvme_req_needs_failover(struct request *req) +bool nvme_req_needs_failover(struct request *req, blk_status_t error) { if (!(req->cmd_flags & REQ_NVME_MPATH)) return false; - - switch (nvme_req(req)->status & 0x7ff) { - /* - * Generic command status: - */ - case NVME_SC_INVALID_OPCODE: - case NVME_SC_INVALID_FIELD: - case NVME_SC_INVALID_NS: - case NVME_SC_LBA_RANGE: - case NVME_SC_CAP_EXCEEDED: - case NVME_SC_RESERVATION_CONFLICT: - return false; - - /* - * I/O command set specific error. Unfortunately these values are - * reused for fabrics commands, but those should never get here. - */ - case NVME_SC_BAD_ATTRIBUTES: - case NVME_SC_INVALID_PI: - case NVME_SC_READ_ONLY: - case NVME_SC_ONCS_NOT_SUPPORTED: - WARN_ON_ONCE(nvme_req(req)->cmd->common.opcode == - nvme_fabrics_command); - return false; - - /* - * Media and Data Integrity Errors: - */ - case NVME_SC_WRITE_FAULT: - case NVME_SC_READ_ERROR: - case NVME_SC_GUARD_CHECK: - case NVME_SC_APPTAG_CHECK: - case NVME_SC_REFTAG_CHECK: - case NVME_SC_COMPARE_FAILED: - case NVME_SC_ACCESS_DENIED: - case NVME_SC_UNWRITTEN_BLOCK: - return false; - } - - /* Everything else could be a path failure, so should be retried */ - return true; + return blk_path_error(error); } void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a00eabd06427..8e4550fa08f8 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -32,6 +32,8 @@ extern unsigned int admin_timeout; #define NVME_KATO_GRACE 10 extern struct workqueue_struct *nvme_wq; +extern struct workqueue_struct *nvme_reset_wq; +extern struct workqueue_struct *nvme_delete_wq; enum { NVME_NS_LBA = 0, @@ -119,6 +121,7 @@ static inline struct nvme_request *nvme_req(struct request *req) enum nvme_ctrl_state { NVME_CTRL_NEW, NVME_CTRL_LIVE, + NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */ NVME_CTRL_RESETTING, NVME_CTRL_RECONNECTING, NVME_CTRL_DELETING, @@ -393,6 +396,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); +int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); @@ -401,7 +405,7 @@ extern const struct block_device_operations nvme_ns_head_ops; #ifdef CONFIG_NVME_MULTIPATH void nvme_failover_req(struct request *req); -bool nvme_req_needs_failover(struct request *req); +bool nvme_req_needs_failover(struct request *req, blk_status_t error); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns_head *head); @@ -430,7 +434,8 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) static inline void nvme_failover_req(struct request *req) { } -static inline bool nvme_req_needs_failover(struct request *req) +static inline bool nvme_req_needs_failover(struct request *req, + blk_status_t error) { return false; } diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4276ebfff22b..6fe7af00a1f4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -75,7 +75,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); * Represents an NVM Express device. Each nvme_dev is a PCI function. */ struct nvme_dev { - struct nvme_queue **queues; + struct nvme_queue *queues; struct blk_mq_tag_set tagset; struct blk_mq_tag_set admin_tagset; u32 __iomem *dbs; @@ -365,7 +365,7 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |