summaryrefslogtreecommitdiffstats
path: root/drivers/nvme
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 11:51:49 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 11:51:49 -0800
commit0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch)
treecefccd67dc1f27bb45830f6b8065dd4a1c05e83b /drivers/nvme
parent9697e9da84299d0d715d515dd2cc48f1eceb277d (diff)
parent796baeeef85a40b3495a907fb7425086e7010102 (diff)
Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block IO related changes for the 4.16 kernel. Nothing major in this pull request, but a good amount of improvements and fixes all over the map. This contains: - BFQ improvements, fixes, and cleanups from Angelo, Chiara, and Paolo. - Support for SMR zones for deadline and mq-deadline from Damien and Christoph. - Set of fixes for bcache by way of Michael Lyle, including fixes from himself, Kent, Rui, Tang, and Coly. - Series from Matias for lightnvm with fixes from Hans Holmberg, Javier, and Matias. Mostly centered around pblk, and the removing rrpc 1.2 in preparation for supporting 2.0. - A couple of NVMe pull requests from Christoph. Nothing major in here, just fixes and cleanups, and support for command tracing from Johannes. - Support for blk-throttle for tracking reads and writes separately. From Joseph Qi. A few cleanups/fixes also for blk-throttle from Weiping. - Series from Mike Snitzer that enables dm to register its queue more logically, something that's alwways been problematic on dm since it's a stacked device. - Series from Ming cleaning up some of the bio accessor use, in preparation for supporting multipage bvecs. - Various fixes from Ming closing up holes around queue mapping and quiescing. - BSD partition fix from Richard Narron, fixing a problem where we can't mount newer (10/11) FreeBSD partitions. - Series from Tejun reworking blk-mq timeout handling. The previous scheme relied on atomic bits, but it had races where we would think a request had timed out if it to reused at the wrong time. - null_blk now supports faking timeouts, to enable us to better exercise and test that functionality separately. From me. - Kill the separate atomic poll bit in the request struct. After this, we don't use the atomic bits on blk-mq anymore at all. From me. - sgl_alloc/free helpers from Bart. - Heavily contended tag case scalability improvement from me. - Various little fixes and cleanups from Arnd, Bart, Corentin, Douglas, Eryu, Goldwyn, and myself" * 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits) block: remove smart1,2.h nvme: add tracepoint for nvme_complete_rq nvme: add tracepoint for nvme_setup_cmd nvme-pci: introduce RECONNECTING state to mark initializing procedure nvme-rdma: remove redundant boolean for inline_data nvme: don't free uuid pointer before printing it nvme-pci: Suspend queues after deleting them bsg: use pr_debug instead of hand crafted macros blk-mq-debugfs: don't allow write on attributes with seq_operations set nvme-pci: Fix queue double allocations block: Set BIO_TRACE_COMPLETION on new bio during split blk-throttle: use queue_is_rq_based block: Remove kblockd_schedule_delayed_work{,_on}() blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly() lib/scatterlist: Fix chaining support in sgl_alloc_order() blk-throttle: track read and write request individually block: add bdev_read_only() checks to common helpers block: fail op_is_write() requests to read-only partitions blk-throttle: export io_serviced_recursive, io_service_bytes_recursive ...
Diffstat (limited to 'drivers/nvme')
-rw-r--r--drivers/nvme/host/Makefile4
-rw-r--r--drivers/nvme/host/core.c134
-rw-r--r--drivers/nvme/host/fabrics.c22
-rw-r--r--drivers/nvme/host/fabrics.h2
-rw-r--r--drivers/nvme/host/fc.c7
-rw-r--r--drivers/nvme/host/lightnvm.c185
-rw-r--r--drivers/nvme/host/multipath.c44
-rw-r--r--drivers/nvme/host/nvme.h9
-rw-r--r--drivers/nvme/host/pci.c216
-rw-r--r--drivers/nvme/host/rdma.c6
-rw-r--r--drivers/nvme/host/trace.c130
-rw-r--r--drivers/nvme/host/trace.h165
-rw-r--r--drivers/nvme/target/Kconfig2
-rw-r--r--drivers/nvme/target/core.c14
-rw-r--r--drivers/nvme/target/fabrics-cmd.c2
-rw-r--r--drivers/nvme/target/fc.c60
-rw-r--r--drivers/nvme/target/fcloop.c244
-rw-r--r--drivers/nvme/target/loop.c3
-rw-r--r--drivers/nvme/target/rdma.c83
19 files changed, 833 insertions, 499 deletions
diff --git a/drivers/nvme/host/Makefile b/drivers/nvme/host/Makefile
index a25fd43650ad..441e67e3a9d7 100644
--- a/drivers/nvme/host/Makefile
+++ b/drivers/nvme/host/Makefile
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y += -I$(src)
+
obj-$(CONFIG_NVME_CORE) += nvme-core.o
obj-$(CONFIG_BLK_DEV_NVME) += nvme.o
obj-$(CONFIG_NVME_FABRICS) += nvme-fabrics.o
@@ -6,6 +9,7 @@ obj-$(CONFIG_NVME_RDMA) += nvme-rdma.o
obj-$(CONFIG_NVME_FC) += nvme-fc.o
nvme-core-y := core.o
+nvme-core-$(CONFIG_TRACING) += trace.o
nvme-core-$(CONFIG_NVME_MULTIPATH) += multipath.o
nvme-core-$(CONFIG_NVM) += lightnvm.o
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 839650e0926a..e8104871cbbf 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -29,6 +29,9 @@
#include <linux/pm_qos.h>
#include <asm/unaligned.h>
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
#include "nvme.h"
#include "fabrics.h"
@@ -65,9 +68,26 @@ static bool streams;
module_param(streams, bool, 0644);
MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
+/*
+ * nvme_wq - hosts nvme related works that are not reset or delete
+ * nvme_reset_wq - hosts nvme reset works
+ * nvme_delete_wq - hosts nvme delete works
+ *
+ * nvme_wq will host works such are scan, aen handling, fw activation,
+ * keep-alive error recovery, periodic reconnects etc. nvme_reset_wq
+ * runs reset works which also flush works hosted on nvme_wq for
+ * serialization purposes. nvme_delete_wq host controller deletion
+ * works which flush reset works for serialization.
+ */
struct workqueue_struct *nvme_wq;
EXPORT_SYMBOL_GPL(nvme_wq);
+struct workqueue_struct *nvme_reset_wq;
+EXPORT_SYMBOL_GPL(nvme_reset_wq);
+
+struct workqueue_struct *nvme_delete_wq;
+EXPORT_SYMBOL_GPL(nvme_delete_wq);
+
static DEFINE_IDA(nvme_subsystems_ida);
static LIST_HEAD(nvme_subsystems);
static DEFINE_MUTEX(nvme_subsystems_lock);
@@ -89,13 +109,13 @@ int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
return -EBUSY;
- if (!queue_work(nvme_wq, &ctrl->reset_work))
+ if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
return -EBUSY;
return 0;
}
EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
-static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
+int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
{
int ret;
@@ -104,6 +124,7 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
flush_work(&ctrl->reset_work);
return ret;
}
+EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
static void nvme_delete_ctrl_work(struct work_struct *work)
{
@@ -122,7 +143,7 @@ int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
return -EBUSY;
- if (!queue_work(nvme_wq, &ctrl->delete_work))
+ if (!queue_work(nvme_delete_wq, &ctrl->delete_work))
return -EBUSY;
return 0;
}
@@ -157,13 +178,20 @@ static blk_status_t nvme_error_status(struct request *req)
return BLK_STS_OK;
case NVME_SC_CAP_EXCEEDED:
return BLK_STS_NOSPC;
+ case NVME_SC_LBA_RANGE:
+ return BLK_STS_TARGET;
+ case NVME_SC_BAD_ATTRIBUTES:
case NVME_SC_ONCS_NOT_SUPPORTED:
+ case NVME_SC_INVALID_OPCODE:
+ case NVME_SC_INVALID_FIELD:
+ case NVME_SC_INVALID_NS:
return BLK_STS_NOTSUPP;
case NVME_SC_WRITE_FAULT:
case NVME_SC_READ_ERROR:
case NVME_SC_UNWRITTEN_BLOCK:
case NVME_SC_ACCESS_DENIED:
case NVME_SC_READ_ONLY:
+ case NVME_SC_COMPARE_FAILED:
return BLK_STS_MEDIUM;
case NVME_SC_GUARD_CHECK:
case NVME_SC_APPTAG_CHECK:
@@ -190,8 +218,12 @@ static inline bool nvme_req_needs_retry(struct request *req)
void nvme_complete_rq(struct request *req)
{
- if (unlikely(nvme_req(req)->status && nvme_req_needs_retry(req))) {
- if (nvme_req_needs_failover(req)) {
+ blk_status_t status = nvme_error_status(req);
+
+ trace_nvme_complete_rq(req);
+
+ if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
+ if (nvme_req_needs_failover(req, status)) {
nvme_failover_req(req);
return;
}
@@ -202,8 +234,7 @@ void nvme_complete_rq(struct request *req)
return;
}
}
-
- blk_mq_end_request(req, nvme_error_status(req));
+ blk_mq_end_request(req, status);
}
EXPORT_SYMBOL_GPL(nvme_complete_rq);
@@ -232,6 +263,15 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
old_state = ctrl->state;
switch (new_state) {
+ case NVME_CTRL_ADMIN_ONLY:
+ switch (old_state) {
+ case NVME_CTRL_RECONNECTING:
+ changed = true;
+ /* FALLTHRU */
+ default:
+ break;
+ }
+ break;
case NVME_CTRL_LIVE:
switch (old_state) {
case NVME_CTRL_NEW:
@@ -247,6 +287,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
+ case NVME_CTRL_ADMIN_ONLY:
changed = true;
/* FALLTHRU */
default:
@@ -266,6 +307,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_DELETING:
switch (old_state) {
case NVME_CTRL_LIVE:
+ case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
case NVME_CTRL_RECONNECTING:
changed = true;
@@ -591,6 +633,10 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
}
cmd->common.command_id = req->tag;
+ if (ns)
+ trace_nvme_setup_nvm_cmd(req->q->id, cmd);
+ else
+ trace_nvme_setup_admin_cmd(cmd);
return ret;
}
EXPORT_SYMBOL_GPL(nvme_setup_cmd);
@@ -1217,16 +1263,27 @@ static int nvme_open(struct block_device *bdev, fmode_t mode)
#ifdef CONFIG_NVME_MULTIPATH
/* should never be called due to GENHD_FL_HIDDEN */
if (WARN_ON_ONCE(ns->head->disk))
- return -ENXIO;
+ goto fail;
#endif
if (!kref_get_unless_zero(&ns->kref))
- return -ENXIO;
+ goto fail;
+ if (!try_module_get(ns->ctrl->ops->module))
+ goto fail_put_ns;
+
return 0;
+
+fail_put_ns:
+ nvme_put_ns(ns);
+fail:
+ return -ENXIO;
}
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
- nvme_put_ns(disk->private_data);
+ struct nvme_ns *ns = disk->private_data;
+
+ module_put(ns->ctrl->ops->module);
+ nvme_put_ns(ns);
}
static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -2052,6 +2109,22 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = {
NULL,
};
+static int nvme_active_ctrls(struct nvme_subsystem *subsys)
+{
+ int count = 0;
+ struct nvme_ctrl *ctrl;
+
+ mutex_lock(&subsys->lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ if (ctrl->state != NVME_CTRL_DELETING &&
+ ctrl->state != NVME_CTRL_DEAD)
+ count++;
+ }
+ mutex_unlock(&subsys->lock);
+
+ return count;
+}
+
static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
{
struct nvme_subsystem *subsys, *found;
@@ -2090,7 +2163,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
* Verify that the subsystem actually supports multiple
* controllers, else bail out.
*/
- if (!(id->cmic & (1 << 1))) {
+ if (nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
dev_err(ctrl->device,
"ignoring ctrl due to duplicate subnqn (%s).\n",
found->subnqn);
@@ -2257,7 +2330,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
shutdown_timeout, 60);
if (ctrl->shutdown_timeout != shutdown_timeout)
- dev_warn(ctrl->device,
+ dev_info(ctrl->device,
"Shutdown timeout set to %u seconds\n",
ctrl->shutdown_timeout);
} else
@@ -2341,8 +2414,14 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
struct nvme_ctrl *ctrl =
container_of(inode->i_cdev, struct nvme_ctrl, cdev);
- if (ctrl->state != NVME_CTRL_LIVE)
+ switch (ctrl->state) {
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_ADMIN_ONLY:
+ break;
+ default:
return -EWOULDBLOCK;
+ }
+
file->private_data = ctrl;
return 0;
}
@@ -2606,6 +2685,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
+ [NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_RECONNECTING]= "reconnecting",
[NVME_CTRL_DELETING] = "deleting",
@@ -3079,6 +3159,8 @@ static void nvme_scan_work(struct work_struct *work)
if (ctrl->state != NVME_CTRL_LIVE)
return;
+ WARN_ON_ONCE(!ctrl->tagset);
+
if (nvme_identify_ctrl(ctrl, &id))
return;
@@ -3099,8 +3181,7 @@ static void nvme_scan_work(struct work_struct *work)
void nvme_queue_scan(struct nvme_ctrl *ctrl)
{
/*
- * Do not queue new scan work when a controller is reset during
- * removal.
+ * Only new queue scan work when admin and IO queues are both alive
*/
if (ctrl->state == NVME_CTRL_LIVE)
queue_work(nvme_wq, &ctrl->scan_work);
@@ -3477,16 +3558,26 @@ EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
int __init nvme_core_init(void)
{
- int result;
+ int result = -ENOMEM;
nvme_wq = alloc_workqueue("nvme-wq",
WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
if (!nvme_wq)
- return -ENOMEM;
+ goto out;
+
+ nvme_reset_wq = alloc_workqueue("nvme-reset-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_reset_wq)
+ goto destroy_wq;
+
+ nvme_delete_wq = alloc_workqueue("nvme-delete-wq",
+ WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+ if (!nvme_delete_wq)
+ goto destroy_reset_wq;
result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
if (result < 0)
- goto destroy_wq;
+ goto destroy_delete_wq;
nvme_class = class_create(THIS_MODULE, "nvme");
if (IS_ERR(nvme_class)) {
@@ -3505,8 +3596,13 @@ destroy_class:
class_destroy(nvme_class);
unregister_chrdev:
unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
+destroy_delete_wq:
+ destroy_workqueue(nvme_delete_wq);
+destroy_reset_wq:
+ destroy_workqueue(nvme_reset_wq);
destroy_wq:
destroy_workqueue(nvme_wq);
+out:
return result;
}
@@ -3516,6 +3612,8 @@ void nvme_core_exit(void)
class_destroy(nvme_subsys_class);
class_destroy(nvme_class);
unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
+ destroy_workqueue(nvme_delete_wq);
+ destroy_workqueue(nvme_reset_wq);
destroy_workqueue(nvme_wq);
}
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 894c2ccb3891..5dd4ceefed8f 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -493,7 +493,7 @@ EXPORT_SYMBOL_GPL(nvmf_should_reconnect);
*/
int nvmf_register_transport(struct nvmf_transport_ops *ops)
{
- if (!ops->create_ctrl)
+ if (!ops->create_ctrl || !ops->module)
return -EINVAL;
down_write(&nvmf_transports_rwsem);
@@ -739,11 +739,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
ret = -ENOMEM;
goto out;
}
- if (uuid_parse(p, &hostid)) {
+ ret = uuid_parse(p, &hostid);
+ if (ret) {
pr_err("Invalid hostid %s\n", p);
ret = -EINVAL;
+ kfree(p);
goto out;
}
+ kfree(p);
break;
case NVMF_OPT_DUP_CONNECT:
opts->duplicate_connect = true;
@@ -869,32 +872,41 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
goto out_unlock;
}
+ if (!try_module_get(ops->module)) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+
ret = nvmf_check_required_opts(opts, ops->required_opts);
if (ret)
- goto out_unlock;
+ goto out_module_put;
ret = nvmf_check_allowed_opts(opts, NVMF_ALLOWED_OPTS |
ops->allowed_opts | ops->required_opts);
if (ret)
- goto out_unlock;
+ goto out_module_put;
ctrl = ops->create_ctrl(dev, opts);
if (IS_ERR(ctrl)) {
ret = PTR_ERR(ctrl);
- goto out_unlock;
+ goto out_module_put;
}
if (strcmp(ctrl->subsys->subnqn, opts->subsysnqn)) {
dev_warn(ctrl->device,
"controller returned incorrect NQN: \"%s\".\n",
ctrl->subsys->subnqn);
+ module_put(ops->module);
up_read(&nvmf_transports_rwsem);
nvme_delete_ctrl_sync(ctrl);
return ERR_PTR(-EINVAL);
}
+ module_put(ops->module);
up_read(&nvmf_transports_rwsem);
return ctrl;
+out_module_put:
+ module_put(ops->module);
out_unlock:
up_read(&nvmf_transports_rwsem);
out_free_opts:
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 9ba614953607..25b19f722f5b 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -108,6 +108,7 @@ struct nvmf_ctrl_options {
* fabric implementation of NVMe fabrics.
* @entry: Used by the fabrics library to add the new
* registration entry to its linked-list internal tree.
+ * @module: Transport module reference
* @name: Name of the NVMe fabric driver implementation.
* @required_opts: sysfs command-line options that must be specified
* when adding a new NVMe controller.
@@ -126,6 +127,7 @@ struct nvmf_ctrl_options {
*/
struct nvmf_transport_ops {
struct list_head entry;
+ struct module *module;
const char *name;
int required_opts;
int allowed_opts;
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 794e66e4aa20..99bf51c7e513 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2921,6 +2921,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
nvme_fc_free_queue(&ctrl->queues[0]);
+ /* re-enable the admin_q so anything new can fast fail */
+ blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
+
nvme_fc_ctlr_inactive_on_rport(ctrl);
}
@@ -2935,6 +2938,9 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
* waiting for io to terminate
*/
nvme_fc_delete_association(ctrl);
+
+ /* resume the io queues so that things will fast fail */
+ nvme_start_queues(nctrl);
}
static void
@@ -3380,6 +3386,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
static struct nvmf_transport_ops nvme_fc_transport = {
.name = "fc",
+ .module = THIS_MODULE,
.required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
.allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
.create_ctrl = nvme_fc_create_ctrl,
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index ba3d7f3349e5..50ef71ee3d86 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -31,27 +31,10 @@
enum nvme_nvm_admin_opcode {
nvme_nvm_admin_identity = 0xe2,
- nvme_nvm_admin_get_l2p_tbl = 0xea,
nvme_nvm_admin_get_bb_tbl = 0xf2,
nvme_nvm_admin_set_bb_tbl = 0xf1,
};
-struct nvme_nvm_hb_rw {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __u64 rsvd2;
- __le64 metadata;
- __le64 prp1;
- __le64 prp2;
- __le64 spba;
- __le16 length;
- __le16 control;
- __le32 dsmgmt;
- __le64 slba;
-};
-
struct nvme_nvm_ph_rw {
__u8 opcode;
__u8 flags;
@@ -80,19 +63,6 @@ struct nvme_nvm_identity {
__u32 rsvd11[5];
};
-struct nvme_nvm_l2ptbl {
- __u8 opcode;
- __u8 flags;
- __u16 command_id;
- __le32 nsid;
- __le32 cdw2[4];
- __le64 prp1;
- __le64 prp2;
- __le64 slba;
- __le32 nlb;
- __le16 cdw14[6];
-};
-
struct nvme_nvm_getbbtbl {
__u8 opcode;
__u8 flags;
@@ -139,9 +109,7 @@ struct nvme_nvm_command {
union {
struct nvme_common_command common;
struct nvme_nvm_identity identity;
- struct nvme_nvm_hb_rw hb_rw;
struct nvme_nvm_ph_rw ph_rw;
- struct nvme_nvm_l2ptbl l2p;
struct nvme_nvm_getbbtbl get_bb;
struct nvme_nvm_setbbtbl set_bb;
struct nvme_nvm_erase_blk erase;
@@ -167,7 +135,7 @@ struct nvme_nvm_id_group {
__u8 num_lun;
__u8 num_pln;
__u8 rsvd1;
- __le16 num_blk;
+ __le16 num_chk;
__le16 num_pg;
__le16 fpg_sz;
__le16 csecs;
@@ -234,11 +202,9 @@ struct nvme_nvm_bb_tbl {
static inline void _nvme_nvm_check_size(void)
{
BUILD_BUG_ON(sizeof(struct nvme_nvm_identity) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_hb_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_ph_rw) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_getbbtbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_setbbtbl) != 64);
- BUILD_BUG_ON(sizeof(struct nvme_nvm_l2ptbl) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_erase_blk) != 64);
BUILD_BUG_ON(sizeof(struct nvme_nvm_id_group) != 960);
BUILD_BUG_ON(sizeof(struct nvme_nvm_addr_format) != 16);
@@ -249,51 +215,58 @@ static inline void _nvme_nvm_check_size(void)
static int init_grps(struct nvm_id *nvm_id, struct nvme_nvm_id *nvme_nvm_id)
{
struct nvme_nvm_id_group *src;
- struct nvm_id_group *dst;
+ struct nvm_id_group *grp;
+ int sec_per_pg, sec_per_pl, pg_per_blk;
if (nvme_nvm_id->cgrps != 1)
return -EINVAL;
src = &nvme_nvm_id->groups[0];
- dst = &nvm_id->grp;
-
- dst->mtype = src->mtype;
- dst->fmtype = src->fmtype;
- dst->num_ch = src->num_ch;
- dst->num_lun = src->num_lun;
- dst->num_pln = src->num_pln;
-
- dst->num_pg = le16_to_cpu(src->num_pg);
- dst->num_blk = le16_to_cpu(src->num_blk);
- dst->fpg_sz = le16_to_cpu(src->fpg_sz);
- dst->csecs = le16_to_cpu(src->csecs);
- dst->sos = le16_to_cpu(src->sos);
-
- dst->trdt = le32_to_cpu(src->trdt);
- dst->trdm = le32_to_cpu(src->trdm);
- dst->tprt = le32_to_cpu(src->tprt);
- dst->tprm = le32_to_cpu(src->tprm);
- dst->tbet = le32_to_cpu(src->tbet);
- dst->tbem = le32_to_cpu(src->tbem);
- dst->mpos = le32_to_cpu(src->mpos);
- dst->mccap = le32_to_cpu(src->mccap);
-
- dst->cpar = le16_to_cpu(src->cpar);
-
- if (dst->fmtype == NVM_ID_FMTYPE_MLC) {
- memcpy(dst->lptbl.id, src->lptbl.id, 8);
- dst->lptbl.mlc.num_pairs =
- le16_to_cpu(src->lptbl.mlc.num_pairs);
-
- if (dst->lptbl.mlc.num_pairs > NVME_NVM_LP_MLC_PAIRS) {
- pr_err("nvm: number of MLC pairs not supported\n");
- return -EINVAL;
- }
+ grp = &nvm_id->grp;
+
+ grp->mtype = src->mtype;
+ grp->fmtype = src->fmtype;
+
+ grp->num_ch = src->num_ch;
+ grp->num_lun = src->num_lun;
+
+ grp->num_chk = le16_to_cpu(src->num_chk);
+ grp->csecs = le16_to_cpu(src->csecs);
+ grp->sos = le16_to_cpu(src->sos);
+
+ pg_per_blk = le16_to_cpu(src->num_pg);
+ sec_per_pg = le16_to_cpu(src->fpg_sz) / grp->csecs;
+ sec_per_pl = sec_per_pg * src->num_pln;
+ grp->clba = sec_per_pl * pg_per_blk;
+ grp->ws_per_chk = pg_per_blk;
- memcpy(dst->lptbl.mlc.pairs, src->lptbl.mlc.pairs,
- dst->lptbl.mlc.num_pairs);
+ grp->mpos = le32_to_cpu(src->mpos);
+ grp->cpar = le16_to_cpu(src->cpar);
+ grp->mccap = le32_to_cpu(src->mccap);
+
+ grp->ws_opt = grp->ws_min = sec_per_pg;
+ grp->ws_seq = NVM_IO_SNGL_ACCESS;
+
+ if (grp->mpos & 0x020202) {
+ grp->ws_seq = NVM_IO_DUAL_ACCESS;
+ grp->ws_opt <<= 1;
+ } else if (grp->mpos & 0x040404) {
+ grp->ws_seq = NVM_IO_QUAD_ACCESS;
+ grp->ws_opt <<= 2;
}
+ grp->trdt = le32_to_cpu(src->trdt);
+ grp->trdm = le32_to_cpu(src->trdm);
+ grp->tprt = le32_to_cpu(src->tprt);
+ grp->tprm = le32_to_cpu(src->tprm);
+ grp->tbet = le32_to_cpu(src->tbet);
+ grp->tbem = le32_to_cpu(src->tbem);
+
+ /* 1.2 compatibility */
+ grp->num_pln = src->num_pln;
+ grp->num_pg = le16_to_cpu(src->num_pg);
+ grp->fpg_sz = le16_to_cpu(src->fpg_sz);
+
return 0;
}
@@ -332,62 +305,6 @@ out:
return ret;
}
-static int nvme_nvm_get_l2p_tbl(struct nvm_dev *nvmdev, u64 slba, u32 nlb,
- nvm_l2p_update_fn *update_l2p, void *priv)
-{
- struct nvme_ns *ns = nvmdev->q->queuedata;
- struct nvme_nvm_command c = {};
- u32 len = queue_max_hw_sectors(ns->ctrl->admin_q) << 9;
- u32 nlb_pr_rq = len / sizeof(u64);
- u64 cmd_slba = slba;
- void *entries;
- int ret = 0;
-
- c.l2p.opcode = nvme_nvm_admin_get_l2p_tbl;
- c.l2p.nsid = cpu_to_le32(ns->head->ns_id);
- entries = kmalloc(len, GFP_KERNEL);
- if (!entries)
- return -ENOMEM;
-
- while (nlb) {
- u32 cmd_nlb = min(nlb_pr_rq, nlb);
- u64 elba = slba + cmd_nlb;
-
- c.l2p.slba = cpu_to_le64(cmd_slba);
- c.l2p.nlb = cpu_to_le32(cmd_nlb);
-
- ret = nvme_submit_sync_cmd(ns->ctrl->admin_q,
- (struct nvme_command *)&c, entries, len);
- if (ret) {
- dev_err(ns->ctrl->device,
- "L2P table transfer failed (%d)\n", ret);
- ret = -EIO;
- goto out;
- }
-
- if (unlikely(elba > nvmdev->total_secs)) {
- pr_err("nvm: L2P data from device is out of bounds!\n");
- ret = -EINVAL;
- goto out;
- }
-
- /* Transform physical address to target address space */
- nvm_part_to_tgt(nvmdev, entries, cmd_nlb);
-
- if (update_l2p(cmd_slba, cmd_nlb, entries, priv)) {
- ret = -EINTR;
- goto out;
- }
-
- cmd_slba += cmd_nlb;
- nlb -= cmd_nlb;
- }
-
-out:
- kfree(entries);
- return ret;
-}
-
static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
u8 *blks)
{
@@ -397,7 +314,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
struct nvme_ctrl *ctrl = ns->ctrl;
struct nvme_nvm_command c = {};
struct nvme_nvm_bb_tbl *bb_tbl;
- int nr_blks = geo->blks_per_lun * geo->plane_mode;
+ int nr_blks = geo->nr_chks * geo->plane_mode;
int tblsz = sizeof(struct nvme_nvm_bb_tbl) + nr_blks;
int ret = 0;
@@ -438,7 +355,7 @@ static int nvme_nvm_get_bb_tbl(struct nvm_dev *nvmdev, struct ppa_addr ppa,
goto out;
}
- memcpy(blks, bb_tbl->blk, geo->blks_per_lun * geo->plane_mode);
+ memcpy(blks, bb_tbl->blk, geo->nr_chks * geo->plane_mode);
out:
kfree(bb_tbl);
return ret;
@@ -474,10 +391,6 @@ static inline void nvme_nvm_rqtocmd(struct nvm_rq *rqd, struct nvme_ns *ns,
c->ph_rw.metadata = cpu_to_le64(rqd->dma_meta_list);
c->ph_rw.control = cpu_to_le16(rqd->flags);
c->ph_rw.length = cpu_to_le16(rqd->nr_ppas - 1);
-
- if (rqd->opcode == NVM_OP_HBWRITE || rqd->opcode == NVM_OP_HBREAD)
- c->hb_rw.slba = cpu_to_le64(nvme_block_nr(ns,
- rqd->bio->bi_iter.bi_sector));
}
static void nvme_nvm_end_io(struct request *rq, blk_status_t status)
@@ -597,8 +510,6 @@ static void nvme_nvm_dev_dma_free(void *pool, void *addr,
static struct nvm_dev_ops nvme_nvm_dev_ops = {
.identity = nvme_nvm_identity,
- .get_l2p_tbl = nvme_nvm_get_l2p_tbl,
-
.get_bb_tbl = nvme_nvm_get_bb_tbl,
.set_bb_tbl = nvme_nvm_set_bb_tbl,
@@ -883,7 +794,7 @@ static ssize_t nvm_dev_attr_show(struct device *dev,
} else if (strcmp(attr->name, "num_planes") == 0) {
return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pln);
} else if (strcmp(attr->name, "num_blocks") == 0) { /* u16 */
- return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_blk);
+ return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_chk);
} else if (strcmp(attr->name, "num_pages") == 0) {
return scnprintf(page, PAGE_SIZE, "%u\n", grp->num_pg);
} else if (strcmp(attr->name, "page_size") == 0) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 1218a9fca846..3b211d9e58b8 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -33,51 +33,11 @@ void nvme_failover_req(struct request *req)
kblockd_schedule_work(&ns->head->requeue_work);
}
-bool nvme_req_needs_failover(struct request *req)
+bool nvme_req_needs_failover(struct request *req, blk_status_t error)
{
if (!(req->cmd_flags & REQ_NVME_MPATH))
return false;
-
- switch (nvme_req(req)->status & 0x7ff) {
- /*
- * Generic command status:
- */
- case NVME_SC_INVALID_OPCODE:
- case NVME_SC_INVALID_FIELD:
- case NVME_SC_INVALID_NS:
- case NVME_SC_LBA_RANGE:
- case NVME_SC_CAP_EXCEEDED:
- case NVME_SC_RESERVATION_CONFLICT:
- return false;
-
- /*
- * I/O command set specific error. Unfortunately these values are
- * reused for fabrics commands, but those should never get here.
- */
- case NVME_SC_BAD_ATTRIBUTES:
- case NVME_SC_INVALID_PI:
- case NVME_SC_READ_ONLY:
- case NVME_SC_ONCS_NOT_SUPPORTED:
- WARN_ON_ONCE(nvme_req(req)->cmd->common.opcode ==
- nvme_fabrics_command);
- return false;
-
- /*
- * Media and Data Integrity Errors:
- */
- case NVME_SC_WRITE_FAULT:
- case NVME_SC_READ_ERROR:
- case NVME_SC_GUARD_CHECK:
- case NVME_SC_APPTAG_CHECK:
- case NVME_SC_REFTAG_CHECK:
- case NVME_SC_COMPARE_FAILED:
- case NVME_SC_ACCESS_DENIED:
- case NVME_SC_UNWRITTEN_BLOCK:
- return false;
- }
-
- /* Everything else could be a path failure, so should be retried */
- return true;
+ return blk_path_error(error);
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a00eabd06427..8e4550fa08f8 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -32,6 +32,8 @@ extern unsigned int admin_timeout;
#define NVME_KATO_GRACE 10
extern struct workqueue_struct *nvme_wq;
+extern struct workqueue_struct *nvme_reset_wq;
+extern struct workqueue_struct *nvme_delete_wq;
enum {
NVME_NS_LBA = 0,
@@ -119,6 +121,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
enum nvme_ctrl_state {
NVME_CTRL_NEW,
NVME_CTRL_LIVE,
+ NVME_CTRL_ADMIN_ONLY, /* Only admin queue live */
NVME_CTRL_RESETTING,
NVME_CTRL_RECONNECTING,
NVME_CTRL_DELETING,
@@ -393,6 +396,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count);
void nvme_start_keep_alive(struct nvme_ctrl *ctrl);
void nvme_stop_keep_alive(struct nvme_ctrl *ctrl);
int nvme_reset_ctrl(struct nvme_ctrl *ctrl);
+int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
@@ -401,7 +405,7 @@ extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH
void nvme_failover_req(struct request *req);
-bool nvme_req_needs_failover(struct request *req);
+bool nvme_req_needs_failover(struct request *req, blk_status_t error);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns_head *head);
@@ -430,7 +434,8 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
static inline void nvme_failover_req(struct request *req)
{
}
-static inline bool nvme_req_needs_failover(struct request *req)
+static inline bool nvme_req_needs_failover(struct request *req,
+ blk_status_t error)
{
return false;
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4276ebfff22b..6fe7af00a1f4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -75,7 +75,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
* Represents an NVM Express device. Each nvme_dev is a PCI function.
*/
struct nvme_dev {
- struct nvme_queue **queues;
+ struct nvme_queue *queues;
struct blk_mq_tag_set tagset;
struct blk_mq_tag_set admin_tagset;
u32 __iomem *dbs;
@@ -365,7 +365,7 @@ static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,