summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2017-11-03 10:28:51 -0600
committerJens Axboe <axboe@kernel.dk>2017-11-03 10:28:51 -0600
commit3e2cb3ad47500ed12d4c8b4cbd737dca352e38e4 (patch)
tree613bdf98252519e5a538c386d1a3054aaaade4de /drivers
parent474f5da2354e9fd5376b968e4c5a0f1807dc19e8 (diff)
parenta806c6c81e6c0d07c8a8b2643bad4a37a196d681 (diff)
Merge branch 'nvme-4.15' of git://git.infradead.org/nvme into for-4.15/block
Pull NVMe changes from Christoph: "Below are the currently queue nvme updates for Linux 4.15. There are a few more things that could make it for this merge window, but I'd like to get things into linux-next, especially for the unlikely case that Linus decided to cut -rc8. Highlights: - support for SGLs in the PCIe driver (Chaitanya Kulkarni) - disable I/O schedulers for the admin queue (Israel Rukshin) - various Fibre Channel fixes and enhancements (James Smart) - various refactoring for better code sharing between transports (Sagi Grimberg and me) as well as lots of little bits from various contributors."
Diffstat (limited to 'drivers')
-rw-r--r--drivers/nvme/Kconfig4
-rw-r--r--drivers/nvme/host/core.c260
-rw-r--r--drivers/nvme/host/fabrics.c12
-rw-r--r--drivers/nvme/host/fabrics.h14
-rw-r--r--drivers/nvme/host/fc.c656
-rw-r--r--drivers/nvme/host/nvme.h26
-rw-r--r--drivers/nvme/host/pci.c224
-rw-r--r--drivers/nvme/host/rdma.c225
-rw-r--r--drivers/nvme/target/core.c13
-rw-r--r--drivers/nvme/target/fc.c16
-rw-r--r--drivers/nvme/target/loop.c47
-rw-r--r--drivers/nvme/target/nvmet.h2
12 files changed, 1008 insertions, 491 deletions
diff --git a/drivers/nvme/Kconfig b/drivers/nvme/Kconfig
index b7c78a5b1f7a..04008e0bbe81 100644
--- a/drivers/nvme/Kconfig
+++ b/drivers/nvme/Kconfig
@@ -1,2 +1,6 @@
+menu "NVME Support"
+
source "drivers/nvme/host/Kconfig"
source "drivers/nvme/target/Kconfig"
+
+endmenu
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bb2aad078637..64744355aa88 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -34,13 +34,13 @@
#define NVME_MINORS (1U << MINORBITS)
-unsigned char admin_timeout = 60;
-module_param(admin_timeout, byte, 0644);
+unsigned int admin_timeout = 60;
+module_param(admin_timeout, uint, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
EXPORT_SYMBOL_GPL(admin_timeout);
-unsigned char nvme_io_timeout = 30;
-module_param_named(io_timeout, nvme_io_timeout, byte, 0644);
+unsigned int nvme_io_timeout = 30;
+module_param_named(io_timeout, nvme_io_timeout, uint, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
EXPORT_SYMBOL_GPL(nvme_io_timeout);
@@ -52,9 +52,6 @@ static u8 nvme_max_retries = 5;
module_param_named(max_retries, nvme_max_retries, byte, 0644);
MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
-static int nvme_char_major;
-module_param(nvme_char_major, int, 0);
-
static unsigned long default_ps_max_latency_us = 100000;
module_param(default_ps_max_latency_us, ulong, 0644);
MODULE_PARM_DESC(default_ps_max_latency_us,
@@ -71,9 +68,8 @@ MODULE_PARM_DESC(streams, "turn on support for Streams write directives");
struct workqueue_struct *nvme_wq;
EXPORT_SYMBOL_GPL(nvme_wq);
-static LIST_HEAD(nvme_ctrl_list);
-static DEFINE_SPINLOCK(dev_list_lock);
-
+static DEFINE_IDA(nvme_instance_ida);
+static dev_t nvme_chr_devt;
static struct class *nvme_class;
static __le32 nvme_get_log_dw10(u8 lid, size_t size)
@@ -101,6 +97,46 @@ static int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
return ret;
}
+static void nvme_delete_ctrl_work(struct work_struct *work)
+{
+ struct nvme_ctrl *ctrl =
+ container_of(work, struct nvme_ctrl, delete_work);
+
+ flush_work(&ctrl->reset_work);
+ nvme_stop_ctrl(ctrl);
+ nvme_remove_namespaces(ctrl);
+ ctrl->ops->delete_ctrl(ctrl);
+ nvme_uninit_ctrl(ctrl);
+ nvme_put_ctrl(ctrl);
+}
+
+int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
+{
+ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
+ return -EBUSY;
+ if (!queue_work(nvme_wq, &ctrl->delete_work))
+ return -EBUSY;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nvme_delete_ctrl);
+
+int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
+{
+ int ret = 0;
+
+ /*
+ * Keep a reference until the work is flushed since ->delete_ctrl
+ * can free the controller.
+ */
+ nvme_get_ctrl(ctrl);
+ ret = nvme_delete_ctrl(ctrl);
+ if (!ret)
+ flush_work(&ctrl->delete_work);
+ nvme_put_ctrl(ctrl);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_delete_ctrl_sync);
+
static blk_status_t nvme_error_status(struct request *req)
{
switch (nvme_req(req)->status & 0x7ff) {
@@ -205,6 +241,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_RECONNECTING:
switch (old_state) {
case NVME_CTRL_LIVE:
+ case NVME_CTRL_RESETTING:
changed = true;
/* FALLTHRU */
default:
@@ -251,12 +288,6 @@ static void nvme_free_ns(struct kref *kref)
if (ns->ndev)
nvme_nvm_unregister(ns);
- if (ns->disk) {
- spin_lock(&dev_list_lock);
- ns->disk->private_data = NULL;
- spin_unlock(&dev_list_lock);
- }
-
put_disk(ns->disk);
ida_simple_remove(&ns->ctrl->ns_ida, ns->instance);
nvme_put_ctrl(ns->ctrl);
@@ -268,29 +299,6 @@ static void nvme_put_ns(struct nvme_ns *ns)
kref_put(&ns->kref, nvme_free_ns);
}
-static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
-{
- struct nvme_ns *ns;
-
- spin_lock(&dev_list_lock);
- ns = disk->private_data;
- if (ns) {
- if (!kref_get_unless_zero(&ns->kref))
- goto fail;
- if (!try_module_get(ns->ctrl->ops->module))
- goto fail_put_ns;
- }
- spin_unlock(&dev_list_lock);
-
- return ns;
-
-fail_put_ns:
- kref_put(&ns->kref, nvme_free_ns);
-fail:
- spin_unlock(&dev_list_lock);
- return NULL;
-}
-
struct request *nvme_alloc_request(struct request_queue *q,
struct nvme_command *cmd, unsigned int flags, int qid)
{
@@ -1052,27 +1060,18 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
}
}
-#ifdef CONFIG_COMPAT
-static int nvme_compat_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned int cmd, unsigned long arg)
-{
- return nvme_ioctl(bdev, mode, cmd, arg);
-}
-#else
-#define nvme_compat_ioctl NULL
-#endif
-
static int nvme_open(struct block_device *bdev, fmode_t mode)
{
- return nvme_get_ns_from_disk(bdev->bd_disk) ? 0 : -ENXIO;
+ struct nvme_ns *ns = bdev->bd_disk->private_data;
+
+ if (!kref_get_unless_zero(&ns->kref))
+ return -ENXIO;
+ return 0;
}
static void nvme_release(struct gendisk *disk, fmode_t mode)
{
- struct nvme_ns *ns = disk->private_data;
-
- module_put(ns->ctrl->ops->module);
- nvme_put_ns(ns);
+ nvme_put_ns(disk->private_data);
}
static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
@@ -1380,7 +1379,7 @@ EXPORT_SYMBOL_GPL(nvme_sec_submit);
static const struct block_device_operations nvme_fops = {
.owner = THIS_MODULE,
.ioctl = nvme_ioctl,
- .compat_ioctl = nvme_compat_ioctl,
+ .compat_ioctl = nvme_ioctl,
.open = nvme_open,
.release = nvme_release,
.getgeo = nvme_getgeo,
@@ -1930,33 +1929,12 @@ EXPORT_SYMBOL_GPL(nvme_init_identify);
static int nvme_dev_open(struct inode *inode, struct file *file)
{
- struct nvme_ctrl *ctrl;
- int instance = iminor(inode);
- int ret = -ENODEV;
-
- spin_lock(&dev_list_lock);
- list_for_each_entry(ctrl, &nvme_ctrl_list, node) {
- if (ctrl->instance != instance)
- continue;
-
- if (!ctrl->admin_q) {
- ret = -EWOULDBLOCK;
- break;
- }
- if (!kref_get_unless_zero(&ctrl->kref))
- break;
- file->private_data = ctrl;
- ret = 0;
- break;
- }
- spin_unlock(&dev_list_lock);
-
- return ret;
-}
+ struct nvme_ctrl *ctrl =
+ container_of(inode->i_cdev, struct nvme_ctrl, cdev);
-static int nvme_dev_release(struct inode *inode, struct file *file)
-{
- nvme_put_ctrl(file->private_data);
+ if (ctrl->state != NVME_CTRL_LIVE)
+ return -EWOULDBLOCK;
+ file->private_data = ctrl;
return 0;
}
@@ -2020,7 +1998,6 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
static const struct file_operations nvme_dev_fops = {
.owner = THIS_MODULE,
.open = nvme_dev_open,
- .release = nvme_dev_release,
.unlocked_ioctl = nvme_dev_ioctl,
.compat_ioctl = nvme_dev_ioctl,
};
@@ -2186,7 +2163,7 @@ static ssize_t nvme_sysfs_delete(struct device *dev,
struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
if (device_remove_file_self(dev, attr))
- ctrl->ops->delete_ctrl(ctrl);
+ nvme_delete_ctrl_sync(ctrl);
return count;
}
static DEVICE_ATTR(delete_controller, S_IWUSR, NULL, nvme_sysfs_delete);
@@ -2298,7 +2275,8 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->ns_id == nsid) {
- kref_get(&ns->kref);
+ if (!kref_get_unless_zero(&ns->kref))
+ continue;
ret = ns;
break;
}
@@ -2401,7 +2379,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
list_add_tail(&ns->list, &ctrl->namespaces);
mutex_unlock(&ctrl->namespaces_mutex);
- kref_get(&ctrl->kref);
+ nvme_get_ctrl(ctrl);
kfree(id);
@@ -2659,7 +2637,7 @@ static void nvme_fw_act_work(struct work_struct *work)
return;
nvme_start_queues(ctrl);
- /* read FW slot informationi to clear the AER*/
+ /* read FW slot information to clear the AER */
nvme_get_fw_slot_info(ctrl);
}
@@ -2706,35 +2684,6 @@ void nvme_queue_async_events(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_queue_async_events);
-static DEFINE_IDA(nvme_instance_ida);
-
-static int nvme_set_instance(struct nvme_ctrl *ctrl)
-{
- int instance, error;
-
- do {
- if (!ida_pre_get(&nvme_instance_ida, GFP_KERNEL))
- return -ENODEV;
-
- spin_lock(&dev_list_lock);
- error = ida_get_new(&nvme_instance_ida, &instance);
- spin_unlock(&dev_list_lock);
- } while (error == -EAGAIN);
-
- if (error)
- return -ENODEV;
-
- ctrl->instance = instance;
- return 0;
-}
-
-static void nvme_release_instance(struct nvme_ctrl *ctrl)
-{
- spin_lock(&dev_list_lock);
- ida_remove(&nvme_instance_ida, ctrl->instance);
- spin_unlock(&dev_list_lock);
-}
-
void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{
nvme_stop_keep_alive(ctrl);
@@ -2759,31 +2708,21 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl)
{
- device_destroy(nvme_class, MKDEV(nvme_char_major, ctrl->instance));
-
- spin_lock(&dev_list_lock);
- list_del(&ctrl->node);
- spin_unlock(&dev_list_lock);
+ cdev_device_del(&ctrl->cdev, ctrl->device);
}
EXPORT_SYMBOL_GPL(nvme_uninit_ctrl);
-static void nvme_free_ctrl(struct kref *kref)
+static void nvme_free_ctrl(struct device *dev)
{
- struct nvme_ctrl *ctrl = container_of(kref, struct nvme_ctrl, kref);
+ struct nvme_ctrl *ctrl =
+ container_of(dev, struct nvme_ctrl, ctrl_device);
- put_device(ctrl->device);
- nvme_release_instance(ctrl);
+ ida_simple_remove(&nvme_instance_ida, ctrl->instance);
ida_destroy(&ctrl->ns_ida);
ctrl->ops->free_ctrl(ctrl);
}
-void nvme_put_ctrl(struct nvme_ctrl *ctrl)
-{
- kref_put(&ctrl->kref, nvme_free_ctrl);
-}
-EXPORT_SYMBOL_GPL(nvme_put_ctrl);
-
/*
* Initialize a NVMe controller structures. This needs to be called during
* earliest initialization so that we have the initialized structured around
@@ -2798,32 +2737,38 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
spin_lock_init(&ctrl->lock);
INIT_LIST_HEAD(&ctrl->namespaces);
mutex_init(&ctrl->namespaces_mutex);
- kref_init(&ctrl->kref);
ctrl->dev = dev;
ctrl->ops = ops;
ctrl->quirks = quirks;
INIT_WORK(&ctrl->scan_work, nvme_scan_work);
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
+ INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
- ret = nvme_set_instance(ctrl);
- if (ret)
+ ret = ida_simple_get(&nvme_instance_ida, 0, 0, GFP_KERNEL);
+ if (ret < 0)
goto out;
-
- ctrl->device = device_create_with_groups(nvme_class, ctrl->dev,
- MKDEV(nvme_char_major, ctrl->instance),
- ctrl, nvme_dev_attr_groups,
- "nvme%d", ctrl->instance);
- if (IS_ERR(ctrl->device)) {
- ret = PTR_ERR(ctrl->device);
+ ctrl->instance = ret;
+
+ device_initialize(&ctrl->ctrl_device);
+ ctrl->device = &ctrl->ctrl_device;
+ ctrl->device->devt = MKDEV(MAJOR(nvme_chr_devt), ctrl->instance);
+ ctrl->device->class = nvme_class;
+ ctrl->device->parent = ctrl->dev;
+ ctrl->device->groups = nvme_dev_attr_groups;
+ ctrl->device->release = nvme_free_ctrl;
+ dev_set_drvdata(ctrl->device, ctrl);
+ ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
+ if (ret)
goto out_release_instance;
- }
- get_device(ctrl->device);
- ida_init(&ctrl->ns_ida);
- spin_lock(&dev_list_lock);
- list_add_tail(&ctrl->node, &nvme_ctrl_list);
- spin_unlock(&dev_list_lock);
+ cdev_init(&ctrl->cdev, &nvme_dev_fops);
+ ctrl->cdev.owner = ops->module;
+ ret = cdev_device_add(&ctrl->cdev, ctrl->device);
+ if (ret)
+ goto out_free_name;
+
+ ida_init(&ctrl->ns_ida);
/*
* Initialize latency tolerance controls. The sysfs files won't
@@ -2834,8 +2779,10 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
min(default_ps_max_latency_us, (unsigned long)S32_MAX));
return 0;
+out_free_name:
+ kfree_const(dev->kobj.name);
out_release_instance:
- nvme_release_instance(ctrl);
+ ida_simple_remove(&nvme_instance_ida, ctrl->instance);
out:
return ret;
}
@@ -2944,6 +2891,16 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
+int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
+{
+ if (!ctrl->ops->reinit_request)
+ return 0;
+
+ return blk_mq_tagset_iter(set, set->driver_data,
+ ctrl->ops->reinit_request);
+}
+EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
+
int __init nvme_core_init(void)
{
int result;
@@ -2953,12 +2910,9 @@ int __init nvme_core_init(void)
if (!nvme_wq)
return -ENOMEM;
- result = __register_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme",
- &nvme_dev_fops);
+ result = alloc_chrdev_region(&nvme_chr_devt, 0, NVME_MINORS, "nvme");
if (result < 0)
goto destroy_wq;
- else if (result > 0)
- nvme_char_major = result;
nvme_class = class_create(THIS_MODULE, "nvme");
if (IS_ERR(nvme_class)) {
@@ -2969,7 +2923,7 @@ int __init nvme_core_init(void)
return 0;
unregister_chrdev:
- __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
destroy_wq:
destroy_workqueue(nvme_wq);
return result;
@@ -2978,7 +2932,7 @@ destroy_wq:
void nvme_core_exit(void)
{
class_destroy(nvme_class);
- __unregister_chrdev(nvme_char_major, 0, NVME_MINORS, "nvme");
+ unregister_chrdev_region(nvme_chr_devt, NVME_MINORS);
destroy_workqueue(nvme_wq);
}
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 555c976cc2ee..a4967de5bb25 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -548,6 +548,7 @@ static const match_table_t opt_tokens = {
{ NVMF_OPT_HOSTNQN, "hostnqn=%s" },
{ NVMF_OPT_HOST_TRADDR, "host_traddr=%s" },
{ NVMF_OPT_HOST_ID, "hostid=%s" },
+ { NVMF_OPT_DUP_CONNECT, "duplicate_connect" },
{ NVMF_OPT_ERR, NULL }
};
@@ -566,6 +567,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->nr_io_queues = num_online_cpus();
opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
opts->kato = NVME_DEFAULT_KATO;
+ opts->duplicate_connect = false;
options = o = kstrdup(buf, GFP_KERNEL);
if (!options)
@@ -742,6 +744,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
goto out;
}
break;
+ case NVMF_OPT_DUP_CONNECT:
+ opts->duplicate_connect = true;
+ break;
default:
pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n",
p);
@@ -823,7 +828,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options);
#define NVMF_REQUIRED_OPTS (NVMF_OPT_TRANSPORT | NVMF_OPT_NQN)
#define NVMF_ALLOWED_OPTS (NVMF_OPT_QUEUE_SIZE | NVMF_OPT_NR_IO_QUEUES | \
NVMF_OPT_KATO | NVMF_OPT_HOSTNQN | \
- NVMF_OPT_HOST_ID)
+ NVMF_OPT_HOST_ID | NVMF_OPT_DUP_CONNECT)
static struct nvme_ctrl *
nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
@@ -841,6 +846,9 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
if (ret)
goto out_free_opts;
+
+ request_module("nvme-%s", opts->transport);
+
/*
* Check the generic options first as we need a valid transport for
* the lookup below. Then clear the generic flags so that transport
@@ -879,7 +887,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count)
"controller returned incorrect NQN: \"%s\".\n",
ctrl->subnqn);
up_read(&nvmf_transports_rwsem);
- ctrl->ops->delete_ctrl(ctrl);
+ nvme_delete_ctrl_sync(ctrl);
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index bf33663218cd..42232e731f19 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -57,6 +57,7 @@ enum {
NVMF_OPT_HOST_TRADDR = 1 << 10,
NVMF_OPT_CTRL_LOSS_TMO = 1 << 11,
NVMF_OPT_HOST_ID = 1 << 12,
+ NVMF_OPT_DUP_CONNECT = 1 << 13,
};
/**
@@ -96,6 +97,7 @@ struct nvmf_ctrl_options {
unsigned int nr_io_queues;
unsigned int reconnect_delay;
bool discovery_nqn;
+ bool duplicate_connect;
unsigned int kato;
struct nvmf_host *host;
int max_reconnects;
@@ -131,6 +133,18 @@ struct nvmf_transport_ops {
struct nvmf_ctrl_options *opts);
};
+static inline bool
+nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
+ struct nvmf_ctrl_options *opts)
+{
+ if (strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) ||
+ strcmp(opts->host->nqn, ctrl->opts->host->nqn) ||
+ memcmp(&opts->host->id, &ctrl->opts->host->id, sizeof(uuid_t)))
+ return false;
+
+ return true;
+}
+
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index af075e998944..113c30be7276 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -45,6 +45,8 @@ enum nvme_fc_queue_flags {
#define NVMEFC_QUEUE_DELAY 3 /* ms units */
+#define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */
+
struct nvme_fc_queue {
struct nvme_fc_ctrl *ctrl;
struct device *dev;
@@ -136,6 +138,7 @@ struct nvme_fc_rport {
struct nvme_fc_lport *lport;
spinlock_t lock;
struct kref ref;
+ unsigned long dev_loss_end;
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
enum nvme_fcctrl_flags {
@@ -157,7 +160,6 @@ struct nvme_fc_ctrl {
struct blk_mq_tag_set admin_tag_set;
struct blk_mq_tag_set tag_set;
- struct work_struct delete_work;
struct delayed_work connect_work;
struct kref ref;
@@ -213,10 +215,16 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
+/*
+ * These items are short-term. They will eventually be moved into
+ * a generic FC class. See comments in module init.
+ */
+static struct class *fc_class;
+static struct device *fc_udev_device;
+
/* *********************** FC-NVME Port Management ************************ */
-static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *);
static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
struct nvme_fc_queue *, unsigned int);
@@ -452,6 +460,171 @@ nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr)
}
EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport);
+/*
+ * TRADDR strings, per FC-NVME are fixed format:
+ * "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters
+ * udev event will only differ by prefix of what field is
+ * being specified:
+ * "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters
+ * 19 + 43 + null_fudge = 64 characters
+ */
+#define FCNVME_TRADDR_LENGTH 64
+
+static void
+nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport,
+ struct nvme_fc_rport *rport)
+{
+ char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/
+ char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/
+ char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL };
+
+ if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY))
+ return;
+
+ snprintf(hostaddr, sizeof(hostaddr),
+ "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx",
+ lport->localport.node_name, lport->localport.port_name);
+ snprintf(tgtaddr, sizeof(tgtaddr),
+ "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx",
+ rport->remoteport.node_name, rport->remoteport.port_name);
+ kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp);
+}
+
+static void
+nvme_fc_free_rport(struct kref *ref)
+{
+ struct nvme_fc_rport *rport =
+ container_of(ref, struct nvme_fc_rport, ref);
+ struct nvme_fc_lport *lport =
+ localport_to_lport(rport->remoteport.localport);
+ unsigned long flags;
+
+ WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
+ WARN_ON(!list_empty(&rport->ctrl_list));
+
+ /* remove from lport list */
+ spin_lock_irqsave(&nvme_fc_lock, flags);
+ list_del(&rport->endp_list);
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+ /* let the LLDD know we've finished tearing it down */
+ lport->ops->remoteport_delete(&rport->remoteport);
+
+ ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
+
+ kfree(rport);
+
+ nvme_fc_lport_put(lport);
+}
+
+static void
+nvme_fc_rport_put(struct nvme_fc_rport *rport)
+{
+ kref_put(&rport->ref, nvme_fc_free_rport);
+}
+
+static int
+nvme_fc_rport_get(struct nvme_fc_rport *rport)
+{
+ return kref_get_unless_zero(&rport->ref);
+}
+
+static void
+nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl)
+{
+ switch (ctrl->ctrl.state) {
+ case NVME_CTRL_NEW:
+ case NVME_CTRL_RECONNECTING:
+ /*
+ * As all reconnects were suppressed, schedule a
+ * connect.
+ */
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: connectivity re-established. "
+ "Attempting reconnect\n", ctrl->cnum);
+
+ queue_delayed_work(nvme_wq, &ctrl->connect_work, 0);
+ break;
+
+ case NVME_CTRL_RESETTING:
+ /*
+ * Controller is already in the process of terminating the
+ * association. No need to do anything further. The reconnect
+ * step will naturally occur after the reset completes.
+ */
+ break;
+
+ default:
+ /* no action to take - let it delete */
+ break;
+ }
+}
+
+static struct nvme_fc_rport *
+nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport,
+ struct nvme_fc_port_info *pinfo)
+{
+ struct nvme_fc_rport *rport;
+ struct nvme_fc_ctrl *ctrl;
+ unsigned long flags;
+
+ spin_lock_irqsave(&nvme_fc_lock, flags);
+
+ list_for_each_entry(rport, &lport->endp_list, endp_list) {
+ if (rport->remoteport.node_name != pinfo->node_name ||
+ rport->remoteport.port_name != pinfo->port_name)
+ continue;
+
+ if (!nvme_fc_rport_get(rport)) {
+ rport = ERR_PTR(-ENOLCK);
+ goto out_done;
+ }
+
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+ spin_lock_irqsave(&rport->lock, flags);
+
+ /* has it been unregistered */
+ if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) {
+ /* means lldd called us twice */
+ spin_unlock_irqrestore(&rport->lock, flags);
+ nvme_fc_rport_put(rport);
+ return ERR_PTR(-ESTALE);
+ }
+
+ rport->remoteport.port_state = FC_OBJSTATE_ONLINE;
+ rport->dev_loss_end = 0;
+
+ /*
+ * kick off a reconnect attempt on all associations to the
+ * remote port. A successful reconnects will resume i/o.
+ */
+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
+ nvme_fc_resume_controller(ctrl);
+
+ spin_unlock_irqrestore(&rport->lock, flags);
+
+ return rport;
+ }
+
+ rport = NULL;
+
+out_done:
+ spin_unlock_irqrestore(&nvme_fc_lock, flags);
+
+ return rport;
+}
+
+static inline void
+__nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport,
+ struct nvme_fc_port_info *pinfo)
+{
+ if (pinfo->dev_loss_tmo)
+ rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo;
+ else
+ rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO;
+}
+
/**
* nvme_fc_register_remoteport - transport entry point called by an
* LLDD to register the existence of a NVME
@@ -478,22 +651,45 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
unsigned long flags;
int ret, idx;
+ if (!nvme_fc_lport_get(lport)) {
+ ret = -ESHUTDOWN;
+ goto out_reghost_failed;
+ }
+
+ /*
+ * look to see if there is already a remoteport that is waiting
+ * for a reconnect (within dev_loss_tmo) with the same WWN's.
+ * If so, transition to it and reconnect.
+ */
+ newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo);
+
+ /* found an rport, but something about its state is bad */
+ if (IS_ERR(newrec)) {
+ ret = PTR_ERR(newrec);
+ goto out_lport_put;
+
+ /* found existing rport, which was resumed */
+ } else if (newrec) {
+ nvme_fc_lport_put(lport);
+ __nvme_fc_set_dev_loss_tmo(newrec, pinfo);
+ nvme_fc_signal_discovery_scan(lport, newrec);
+ *portptr = &newrec->remoteport;
+ return 0;
+ }
+
+ /* nothing found - allocate a new remoteport struct */
+
newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz),
GFP_KERNEL);
if (!newrec) {
ret = -ENOMEM;
- goto out_reghost_failed;
- }
-
- if (!nvme_fc_lport_get(lport)) {
- ret = -ESHUTDOWN;
- goto out_kfree_rport;
+ goto out_lport_put;
}
idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL);
if (idx < 0) {
ret = -ENOSPC;
- goto out_lport_put;
+ goto out_kfree_rport;
}
INIT_LIST_HEAD(&newrec->endp_list);
@@ -511,63 +707,27 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
newrec->remoteport.port_id = pinfo->port_id;
newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
newrec->remoteport.port_num = idx;
+ __nvme_fc_set_dev_loss_tmo(newrec, pinfo);
spin_lock_irqsave(&nvme_fc_lock, flags);
list_add_tail(&newrec->endp_list, &lport->endp_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);
+ nvme_fc_signal_discovery_scan(lport, newrec);
+
*portptr = &newrec->remoteport;
return 0;
-out_lport_put:
- nvme_fc_lport_put(lport);
out_kfree_rport:
kfree(newrec);
+out_lport_put:
+ nvme_fc_lport_put(lport);
out_reghost_failed:
*portptr = NULL;
return ret;
}
EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport);
-static void
-nvme_fc_free_rport(struct kref *ref)
-{
- struct nvme_fc_rport *rport =
- container_of(ref, struct nvme_fc_rport, ref);
- struct nvme_fc_lport *lport =
- localport_to_lport(rport->remoteport.localport);
- unsigned long flags;
-
- WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED);
- WARN_ON(!list_empty(&rport->ctrl_list));
-
- /* remove from lport list */
- spin_lock_irqsave(&nvme_fc_lock, flags);
- list_del(&rport->endp_list);
- spin_unlock_irqrestore(&nvme_fc_lock, flags);
-
- /* let the LLDD know we've finished tearing it down */
- lport->ops->remoteport_delete(&rport->remoteport);
-
- ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);
-
- kfree(rport);
-
- nvme_fc_lport_put(lport);
-}
-
-static void
-nvme_fc_rport_put(struct nvme_fc_rport *rport)
-{
- kref_put(&rport->ref, nvme_fc_free_rport);
-}
-
-static int
-nvme_fc_rport_get(struct nvme_fc_rport *rport)
-{
- return kref_get_unless_zero(&rport->ref);
-}
-
static int
nvme_fc_abort_lsops(struct nvme_fc_rport *rport)
{
@@ -592,6 +752,58 @@ restart:
return 0;
}
+static void
+nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl)
+{
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: controller connectivity lost. Awaiting "
+ "Reconnect", ctrl->cnum);
+
+ switch (ctrl->ctrl.state) {
+ case NVME_CTRL_NEW:
+ case NVME_CTRL_LIVE:
+ /*
+ * Schedule a controller reset. The reset will terminate the
+ * association and schedule the reconnect timer. Reconnects
+ * will be attempted until either the ctlr_loss_tmo
+ * (max_retries * connect_delay) expires or the remoteport's
+ * dev_loss_tmo expires.
+ */
+ if (nvme_reset_ctrl(&ctrl->ctrl)) {
+ dev_warn(ctrl->ctrl.device,
+ "NVME-FC{%d}: Couldn't schedule reset. "
+ "Deleting controller.\n",
+ ctrl->cnum);
+ nvme_delete_ctrl(&ctrl->ctrl);
+ }
+ break;
+
+ case NVME_CTRL_RECONNECTING:
+ /*
+ * The association has already been terminated and the
+ * controller is attempting reconnects. No need to do anything
+ * futher. Reconnects will be attempted until either the
+ * ctlr_loss_tmo (max_retries * connect_delay) expires or the
+ * remoteport's dev_loss_tmo expires.
+ */
+ break;
+
+ case NVME_CTRL_RESETTING:
+ /*
+ * Controller is already in the process of terminating the
+ * association. No need to do anything further. The reconnect
+ * step will kick in naturally after the association is
+ * terminated.
+ */
+ break;
+
+ case NVME_CTRL_DELETING:
+ default:
+ /* no action to take - let it delete */
+ break;
+ }
+}
+
/**
* nvme_fc_unregister_remoteport - transport entry point called by an
* LLDD to deregister/remove a previously
@@ -621,19 +833,75 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr)
}
portptr->port_state = FC_OBJSTATE_DELETED;
- /* tear down all associations to the remote port */
- list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list)
- __nvme_fc_del_ctrl(ctrl);
+ rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ);
+
+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
+ /* if dev_loss_tmo==0, dev loss is immediate */
+ if (!portptr->dev_loss_tmo) {
+ dev_warn(ctrl->ctrl.device,
+ "NVME-FC{%d}: controller connectivity lost. "
+ "Deleting controller.\n",
+ ctrl->cnum);
+ nvme_delete_ctrl(&ctrl->ctrl);
+ } else
+ nvme_fc_ctrl_connectivity_loss(ctrl);
+ }
spin_unlock_irqrestore(&rport->lock, flags);
nvme_fc_abort_lsops(rport);
+ /*
+ * release the reference, which will allow, if all controllers
+ * go away, which should only occur after dev_loss_tmo occurs,
+ * for the rport to be torn down.
+ */
nvme_fc_rport_put(rport);
+
return 0;
}
EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport);
+/**
+ * nvme_fc_rescan_remoteport - transport entry point called by an
+ * LLDD to request a nvme device rescan.
+ * @remoteport: pointer to the (registered) remote port that is to be
+ * rescanned.
+ *
+ * Returns: N/A
+ */
+void
+nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport)
+{
+ struct nvme_fc_rport *rport = remoteport_to_rport(remoteport);
+
+ nvme_fc_signal_discovery_scan(rport->lport, rport);
+}
+EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport);
+
+int
+nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr,
+ u32 dev_loss_tmo)
+{
+ struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rport->lock, flags);
+
+ if (portptr->port_state != FC_OBJSTATE_ONLINE) {
+ spin_unlock_irqrestore(&rport->lock, flags);
+ return -EINVAL;
+ }
+
+ /* a dev_loss_tmo of 0 (immediate) i