summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 11:05:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 11:05:47 -0800
commit0be600a5add76e8e8b9e1119f2a7426ff849aca8 (patch)
treed5fcc2b119f03143f9bed1b9aa5cb85458c8bd03 /drivers/md
parent040639b7fcf73ee39c15d38257f652a2048e96f2 (diff)
parent9614e2ba9161c7f5419f4212fa6057d2a65f6ae6 (diff)
Merge tag 'for-4.16/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - DM core fixes to ensure that bio submission follows a depth-first tree walk; this is critical to allow forward progress without the need to use the bioset's BIOSET_NEED_RESCUER. - Remove DM core's BIOSET_NEED_RESCUER based dm_offload infrastructure. - DM core cleanups and improvements to make bio-based DM more efficient (e.g. reduced memory footprint as well leveraging per-bio-data more). - Introduce new bio-based mode (DM_TYPE_NVME_BIO_BASED) that leverages the more direct IO submission path in the block layer; this mode is used by DM multipath and also optimizes targets like DM thin-pool that stack directly on NVMe data device. - DM multipath improvements to factor out legacy SCSI-only (e.g. scsi_dh) code paths to allow for more optimized support for NVMe multipath. - A fix for DM multipath path selectors (service-time and queue-length) to select paths in a more balanced way; largely academic but doesn't hurt. - Numerous DM raid target fixes and improvements. - Add a new DM "unstriped" target that enables Intel to workaround firmware limitations in some NVMe drives that are striped internally (this target also works when stacked above the DM "striped" target). - Various Documentation fixes and improvements. - Misc cleanups and fixes across various DM infrastructure and targets (e.g. bufio, flakey, log-writes, snapshot). * tag 'for-4.16/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (69 commits) dm cache: Documentation: update default migration_throttling value dm mpath selector: more evenly distribute ties dm unstripe: fix target length versus number of stripes size check dm thin: fix trailing semicolon in __remap_and_issue_shared_cell dm table: fix NVMe bio-based dm_table_determine_type() validation dm: various cleanups to md->queue initialization code dm mpath: delay the retry of a request if the target responded as busy dm mpath: return DM_MAPIO_DELAY_REQUEUE if QUEUE_IO or PG_INIT_REQUIRED dm mpath: return DM_MAPIO_REQUEUE on blk-mq rq allocation failure dm log writes: fix max length used for kstrndup dm: backfill missing calls to mutex_destroy() dm snapshot: use mutex instead of rw_semaphore dm flakey: check for null arg_name in parse_features() dm thin: extend thinpool status format string with omitted fields dm thin: fixes in thin-provisioning.txt dm thin: document representation of <highest mapped sector> when there is none dm thin: fix documentation relative to low water mark threshold dm cache: be consistent in specifying sectors and SI units in cache.txt dm cache: delete obsoleted paragraph in cache.txt dm cache: fix grammar in cache-policies.txt ...
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig7
-rw-r--r--drivers/md/Makefile1
-rw-r--r--drivers/md/dm-bufio.c37
-rw-r--r--drivers/md/dm-core.h5
-rw-r--r--drivers/md/dm-crypt.c5
-rw-r--r--drivers/md/dm-delay.c2
-rw-r--r--drivers/md/dm-flakey.c5
-rw-r--r--drivers/md/dm-io.c3
-rw-r--r--drivers/md/dm-kcopyd.c6
-rw-r--r--drivers/md/dm-log-writes.c2
-rw-r--r--drivers/md/dm-mpath.c297
-rw-r--r--drivers/md/dm-queue-length.c6
-rw-r--r--drivers/md/dm-raid.c380
-rw-r--r--drivers/md/dm-rq.c6
-rw-r--r--drivers/md/dm-service-time.c6
-rw-r--r--drivers/md/dm-snap.c84
-rw-r--r--drivers/md/dm-stats.c1
-rw-r--r--drivers/md/dm-table.c114
-rw-r--r--drivers/md/dm-thin.c9
-rw-r--r--drivers/md/dm-unstripe.c219
-rw-r--r--drivers/md/dm-zoned-metadata.c3
-rw-r--r--drivers/md/dm-zoned-target.c3
-rw-r--r--drivers/md/dm.c659
-rw-r--r--drivers/md/dm.h4
24 files changed, 1252 insertions, 612 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 83b9362be09c..2c8ac3688815 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -269,6 +269,13 @@ config DM_BIO_PRISON
source "drivers/md/persistent-data/Kconfig"
+config DM_UNSTRIPED
+ tristate "Unstriped target"
+ depends on BLK_DEV_DM
+ ---help---
+ Unstripes I/O so it is issued solely on a single drive in a HW
+ RAID0 or dm-striped target.
+
config DM_CRYPT
tristate "Crypt target support"
depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index f701bb211783..63255f3ebd97 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -43,6 +43,7 @@ obj-$(CONFIG_BCACHE) += bcache/
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
obj-$(CONFIG_BLK_DEV_DM_BUILTIN) += dm-builtin.o
+obj-$(CONFIG_DM_UNSTRIPED) += dm-unstripe.o
obj-$(CONFIG_DM_BUFIO) += dm-bufio.o
obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index c546b567f3b5..414c9af54ded 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -662,7 +662,7 @@ static void submit_io(struct dm_buffer *b, int rw, bio_end_io_t *end_io)
sector = (b->block << b->c->sectors_per_block_bits) + b->c->start;
- if (rw != WRITE) {
+ if (rw != REQ_OP_WRITE) {
n_sectors = 1 << b->c->sectors_per_block_bits;
offset = 0;
} else {
@@ -740,7 +740,7 @@ static void __write_dirty_buffer(struct dm_buffer *b,
b->write_end = b->dirty_end;
if (!write_list)
- submit_io(b, WRITE, write_endio);
+ submit_io(b, REQ_OP_WRITE, write_endio);
else
list_add_tail(&b->write_list, write_list);
}
@@ -753,7 +753,7 @@ static void __flush_write_list(struct list_head *write_list)
struct dm_buffer *b =
list_entry(write_list->next, struct dm_buffer, write_list);
list_del(&b->write_list);
- submit_io(b, WRITE, write_endio);
+ submit_io(b, REQ_OP_WRITE, write_endio);
cond_resched();
}
blk_finish_plug(&plug);
@@ -1123,7 +1123,7 @@ static void *new_read(struct dm_bufio_client *c, sector_t block,
return NULL;
if (need_submit)
- submit_io(b, READ, read_endio);
+ submit_io(b, REQ_OP_READ, read_endio);
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
@@ -1193,7 +1193,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
dm_bufio_unlock(c);
if (need_submit)
- submit_io(b, READ, read_endio);
+ submit_io(b, REQ_OP_READ, read_endio);
dm_bufio_release(b);
cond_resched();
@@ -1454,7 +1454,7 @@ retry:
old_block = b->block;
__unlink_buffer(b);
__link_buffer(b, new_block, b->list_mode);
- submit_io(b, WRITE, write_endio);
+ submit_io(b, REQ_OP_WRITE, write_endio);
wait_on_bit_io(&b->state, B_WRITING,
TASK_UNINTERRUPTIBLE);
__unlink_buffer(b);
@@ -1716,7 +1716,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
if (!DM_BUFIO_CACHE_NAME(c)) {
r = -ENOMEM;
mutex_unlock(&dm_bufio_clients_lock);
- goto bad_cache;
+ goto bad;
}
}
@@ -1727,7 +1727,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
if (!DM_BUFIO_CACHE(c)) {
r = -ENOMEM;
mutex_unlock(&dm_bufio_clients_lock);
- goto bad_cache;
+ goto bad;
}
}
}
@@ -1738,27 +1738,28 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
if (!b) {
r = -ENOMEM;
- goto bad_buffer;
+ goto bad;
}
__free_buffer_wake(b);
}
+ c->shrinker.count_objects = dm_bufio_shrink_count;
+ c->shrinker.scan_objects = dm_bufio_shrink_scan;
+ c->shrinker.seeks = 1;
+ c->shrinker.batch = 0;
+ r = register_shrinker(&c->shrinker);
+ if (r)
+ goto bad;
+
mutex_lock(&dm_bufio_clients_lock);
dm_bufio_client_count++;
list_add(&c->client_list, &dm_bufio_all_clients);
__cache_size_refresh();
mutex_unlock(&dm_bufio_clients_lock);
- c->shrinker.count_objects = dm_bufio_shrink_count;
- c->shrinker.scan_objects = dm_bufio_shrink_scan;
- c->shrinker.seeks = 1;
- c->shrinker.batch = 0;
- register_shrinker(&c->shrinker);
-
return c;
-bad_buffer:
-bad_cache:
+bad:
while (!list_empty(&c->reserved_buffers)) {
struct dm_buffer *b = list_entry(c->reserved_buffers.next,
struct dm_buffer, lru_list);
@@ -1767,6 +1768,7 @@ bad_cache:
}
dm_io_client_destroy(c->dm_io);
bad_dm_io:
+ mutex_destroy(&c->lock);
kfree(c);
bad_client:
return ERR_PTR(r);
@@ -1811,6 +1813,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
BUG_ON(c->n_buffers[i]);
dm_io_client_destroy(c->dm_io);
+ mutex_destroy(&c->lock);
kfree(c);
}
EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 6a14f945783c..3222e21cbbf8 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -91,8 +91,7 @@ struct mapped_device {
/*
* io objects are allocated from here.
*/
- mempool_t *io_pool;
-
+ struct bio_set *io_bs;
struct bio_set *bs;
/*
@@ -130,8 +129,6 @@ struct mapped_device {
struct srcu_struct io_barrier;
};
-void dm_init_md_queue(struct mapped_device *md);
-void dm_init_normal_md_queue(struct mapped_device *md);
int md_in_flight(struct mapped_device *md);
void disable_write_same(struct mapped_device *md);
void disable_write_zeroes(struct mapped_device *md);
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 2ad429100d25..8168f737590e 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -2193,6 +2193,8 @@ static void crypt_dtr(struct dm_target *ti)
kzfree(cc->cipher_auth);
kzfree(cc->authenc_key);
+ mutex_destroy(&cc->bio_alloc_lock);
+
/* Must zero key material before freeing */
kzfree(cc);
}
@@ -2702,8 +2704,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- cc->bs = bioset_create(MIN_IOS, 0, (BIOSET_NEED_BVECS |
- BIOSET_NEED_RESCUER));
+ cc->bs = bioset_create(MIN_IOS, 0, BIOSET_NEED_BVECS);
if (!cc->bs) {
ti->error = "Cannot allocate crypt bioset";
goto bad;
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 288386bfbfb5..1783d80c9cad 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -229,6 +229,8 @@ static void delay_dtr(struct dm_target *ti)
if (dc->dev_write)
dm_put_device(ti, dc->dev_write);
+ mutex_destroy(&dc->timer_lock);
+
kfree(dc);
}
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index b82cb1ab1eaa..1b907b15f5c3 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -70,6 +70,11 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
arg_name = dm_shift_arg(as);
argc--;
+ if (!arg_name) {
+ ti->error = "Insufficient feature arguments";
+ return -EINVAL;
+ }
+
/*
* drop_writes
*/
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index b4357ed4d541..a8d914d5abbe 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -58,8 +58,7 @@ struct dm_io_client *dm_io_client_create(void)
if (!client->pool)
goto bad;
- client->bios = bioset_create(min_ios, 0, (BIOSET_NEED_BVECS |
- BIOSET_NEED_RESCUER));
+ client->bios = bioset_create(min_ios, 0, BIOSET_NEED_BVECS);
if (!client->bios)
goto bad;
diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c
index eb45cc3df31d..e6e7c686646d 100644
--- a/drivers/md/dm-kcopyd.c
+++ b/drivers/md/dm-kcopyd.c
@@ -477,8 +477,10 @@ static int run_complete_job(struct kcopyd_job *job)
* If this is the master job, the sub jobs have already
* completed so we can free everything.
*/
- if (job->master_job == job)
+ if (job->master_job == job) {
+ mutex_destroy(&job->lock);
mempool_free(job, kc->job_pool);
+ }
fn(read_err, write_err, context);
if (atomic_dec_and_test(&kc->nr_jobs))
@@ -750,6 +752,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
* followed by SPLIT_COUNT sub jobs.
*/
job = mempool_alloc(kc->job_pool, GFP_NOIO);
+ mutex_init(&job->lock);
/*
* set up for the read.
@@ -811,7 +814,6 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
if (job->source.count <= SUB_JOB_SIZE)
dispatch_job(job);
else {
- mutex_init(&job->lock);
job->progress = 0;
split_job(job);
}
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 189badbeddaf..3362d866793b 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -594,7 +594,7 @@ static int log_mark(struct log_writes_c *lc, char *data)
return -ENOMEM;
}
- block->data = kstrndup(data, maxsize, GFP_KERNEL);
+ block->data = kstrndup(data, maxsize - 1, GFP_KERNEL);
if (!block->data) {
DMERR("Error copying mark data");
kfree(block);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index ef57c6d1c887..7d3e572072f5 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -64,36 +64,30 @@ struct priority_group {
/* Multipath context */
struct multipath {
- struct list_head list;
- struct dm_target *ti;
-
- const char *hw_handler_name;
- char *hw_handler_params;
+ unsigned long flags; /* Multipath state flags */
spinlock_t lock;
-
- unsigned nr_priority_groups;
- struct list_head priority_groups;
-
- wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
+ enum dm_queue_mode queue_mode;
struct pgpath *current_pgpath;
struct priority_group *current_pg;
struct priority_group *next_pg; /* Switch to this PG if set */
- unsigned long flags; /* Multipath state flags */
+ atomic_t nr_valid_paths; /* Total number of usable paths */
+ unsigned nr_priority_groups;
+ struct list_head priority_groups;
+ const char *hw_handler_name;
+ char *hw_handler_params;
+ wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
unsigned pg_init_retries; /* Number of times to retry pg_init */
unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
-
- atomic_t nr_valid_paths; /* Total number of usable paths */
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
atomic_t pg_init_count; /* Number of times pg_init called */
- enum dm_queue_mode queue_mode;
-
struct mutex work_mutex;
struct work_struct trigger_event;
+ struct dm_target *ti;
struct work_struct process_queued_bios;
struct bio_list queued_bios;
@@ -135,10 +129,10 @@ static struct pgpath *alloc_pgpath(void)
{
struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
- if (pgpath) {
- pgpath->is_active = true;
- INIT_DELAYED_WORK(&pgpath->activate_path, activate_path_work);
- }
+ if (!pgpath)
+ return NULL;
+
+ pgpath->is_active = true;
return pgpath;
}
@@ -193,13 +187,8 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
if (m) {
INIT_LIST_HEAD(&m->priority_groups);
spin_lock_init(&m->lock);
- set_bit(MPATHF_QUEUE_IO, &m->flags);
atomic_set(&m->nr_valid_paths, 0);
- atomic_set(&m->pg_init_in_progress, 0);
- atomic_set(&m->pg_init_count, 0);
- m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
INIT_WORK(&m->trigger_event, trigger_event);
- init_waitqueue_head(&m->pg_init_wait);
mutex_init(&m->work_mutex);
m->queue_mode = DM_TYPE_NONE;
@@ -221,13 +210,26 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
else
m->queue_mode = DM_TYPE_REQUEST_BASED;
- } else if (m->queue_mode == DM_TYPE_BIO_BASED) {
+
+ } else if (m->queue_mode == DM_TYPE_BIO_BASED ||
+ m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios);
- /*
- * bio-based doesn't support any direct scsi_dh management;
- * it just discovers if a scsi_dh is attached.
- */
- set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
+
+ if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ /*
+ * bio-based doesn't support any direct scsi_dh management;
+ * it just discovers if a scsi_dh is attached.
+ */
+ set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
+ }
+ }
+
+ if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
+ set_bit(MPATHF_QUEUE_IO, &m->flags);
+ atomic_set(&m->pg_init_in_progress, 0);
+ atomic_set(&m->pg_init_count, 0);
+ m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
+ init_waitqueue_head(&m->pg_init_wait);
}
dm_table_set_type(ti->table, m->queue_mode);
@@ -246,6 +248,7 @@ static void free_multipath(struct multipath *m)
kfree(m->hw_handler_name);
kfree(m->hw_handler_params);
+ mutex_destroy(&m->work_mutex);
kfree(m);
}
@@ -264,29 +267,23 @@ static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio)
return dm_per_bio_data(bio, multipath_per_bio_data_size());
}
-static struct dm_bio_details *get_bio_details_from_bio(struct bio *bio)
+static struct dm_bio_details *get_bio_details_from_mpio(struct dm_mpath_io *mpio)
{
/* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */
- struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
void *bio_details = mpio + 1;
-
return bio_details;
}
-static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p,
- struct dm_bio_details **bio_details_p)
+static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p)
{
struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
- struct dm_bio_details *bio_details = get_bio_details_from_bio(bio);
+ struct dm_bio_details *bio_details = get_bio_details_from_mpio(mpio);
- memset(mpio, 0, sizeof(*mpio));
- memset(bio_details, 0, sizeof(*bio_details));
- dm_bio_record(bio_details, bio);
+ mpio->nr_bytes = bio->bi_iter.bi_size;
+ mpio->pgpath = NULL;
+ *mpio_p = mpio;
- if (mpio_p)
- *mpio_p = mpio;
- if (bio_details_p)
- *bio_details_p = bio_details;
+ dm_bio_record(bio_details, bio);
}
/*-----------------------------------------------
@@ -340,6 +337,9 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg)
{
m->current_pg = pg;
+ if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+ return;
+
/* Must we initialise the PG first, and queue I/O till it's ready? */
if (m->hw_handler_name) {
set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
@@ -385,7 +385,8 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
unsigned bypassed = 1;
if (!atomic_read(&m->nr_valid_paths)) {
- clear_bit(MPATHF_QUEUE_IO, &m->flags);
+ if (m->queue_mode != DM_TYPE_NVME_BIO_BASED)
+ clear_bit(MPATHF_QUEUE_IO, &m->flags);
goto failed;
}
@@ -516,12 +517,10 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
return DM_MAPIO_KILL;
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
- if (pg_init_all_paths(m))
- return DM_MAPIO_DELAY_REQUEUE;
- return DM_MAPIO_REQUEUE;
+ pg_init_all_paths(m);
+ return DM_MAPIO_DELAY_REQUEUE;
}
- memset(mpio, 0, sizeof(*mpio));
mpio->pgpath = pgpath;
mpio->nr_bytes = nr_bytes;
@@ -530,12 +529,23 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC);
if (IS_ERR(clone)) {
/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
- bool queue_dying = blk_queue_dying(q);
- if (queue_dying) {
+ if (blk_queue_dying(q)) {
atomic_inc(&m->pg_init_in_progress);
activate_or_offline_path(pgpath);
+ return DM_MAPIO_DELAY_REQUEUE;
}
- return DM_MAPIO_DELAY_REQUEUE;
+
+ /*
+ * blk-mq's SCHED_RESTART can cover this requeue, so we
+ * needn't deal with it by DELAY_REQUEUE. More importantly,
+ * we have to return DM_MAPIO_REQUEUE so that blk-mq can
+ * get the queue busy feedback (via BLK_STS_RESOURCE),
+ * otherwise I/O merging can suffer.
+ */
+ if (q->mq_ops)
+ return DM_MAPIO_REQUEUE;
+ else
+ return DM_MAPIO_DELAY_REQUEUE;
}
clone->bio = clone->biotail = NULL;
clone->rq_disk = bdev->bd_disk;
@@ -557,9 +567,9 @@ static void multipath_release_clone(struct request *clone)
/*
* Map cloned bios (bio-based multipath)
*/
-static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_mpath_io *mpio)
+
+static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
{
- size_t nr_bytes = bio->bi_iter.bi_size;
struct pgpath *pgpath;
unsigned long flags;
bool queue_io;
@@ -568,7 +578,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
pgpath = READ_ONCE(m->current_pgpath);
queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags);
if (!pgpath || !queue_io)
- pgpath = choose_pgpath(m, nr_bytes);
+ pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
if ((pgpath && queue_io) ||
(!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) {
@@ -576,14 +586,62 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
spin_lock_irqsave(&m->lock, flags);
bio_list_add(&m->queued_bios, bio);
spin_unlock_irqrestore(&m->lock, flags);
+
/* PG_INIT_REQUIRED cannot be set without QUEUE_IO */
if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
pg_init_all_paths(m);
else if (!queue_io)
queue_work(kmultipathd, &m->process_queued_bios);
- return DM_MAPIO_SUBMITTED;
+
+ return ERR_PTR(-EAGAIN);
}
+ return pgpath;
+}
+
+static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio)
+{
+ struct pgpath *pgpath;
+ unsigned long flags;
+
+ /* Do we need to select a new pgpath? */
+ /*
+ * FIXME: currently only switching path if no path (due to failure, etc)
+ * - which negates the point of using a path selector
+ */
+ pgpath = READ_ONCE(m->current_pgpath);
+ if (!pgpath)
+ pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
+
+ if (!pgpath) {
+ if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
+ /* Queue for the daemon to resubmit */
+ spin_lock_irqsave(&m->lock, flags);
+ bio_list_add(&m->queued_bios, bio);
+ spin_unlock_irqrestore(&m->lock, flags);
+ queue_work(kmultipathd, &m->process_queued_bios);
+
+ return ERR_PTR(-EAGAIN);
+ }
+ return NULL;
+ }
+
+ return pgpath;
+}
+
+static int __multipath_map_bio(struct multipath *m, struct bio *bio,
+ struct dm_mpath_io *mpio)
+{
+ struct pgpath *pgpath;
+
+ if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
+ pgpath = __map_bio_nvme(m, bio);
+ else
+ pgpath = __map_bio(m, bio);
+
+ if (IS_ERR(pgpath))
+ return DM_MAPIO_SUBMITTED;
+
if (!pgpath) {
if (must_push_back_bio(m))
return DM_MAPIO_REQUEUE;
@@ -592,7 +650,6 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
}
mpio->pgpath = pgpath;
- mpio->nr_bytes = nr_bytes;
bio->bi_status = 0;
bio_set_dev(bio, pgpath->path.dev->bdev);
@@ -601,7 +658,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m
if (pgpath->pg->ps.type->start_io)
pgpath->pg->ps.type->start_io(&pgpath->pg->ps,
&pgpath->path,
- nr_bytes);
+ mpio->nr_bytes);
return DM_MAPIO_REMAPPED;
}
@@ -610,8 +667,7 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
struct multipath *m = ti->private;
struct dm_mpath_io *mpio = NULL;
- multipath_init_per_bio_data(bio, &mpio, NULL);
-
+ multipath_init_per_bio_data(bio, &mpio);
return __multipath_map_bio(m, bio, mpio);
}
@@ -619,7 +675,8 @@ static void process_queued_io_list(struct multipath *m)
{
if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
- else if (m->queue_mode == DM_TYPE_BIO_BASED)
+ else if (m->queue_mode == DM_TYPE_BIO_BASED ||
+ m->queue_mode == DM_TYPE_NVME_BIO_BASED)
queue_work(kmultipathd, &m->process_queued_bios);
}
@@ -649,7 +706,9 @@ static void process_queued_bios(struct work_struct *work)
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
- r = __multipath_map_bio(m, bio, get_mpio_from_bio(bio));
+ struct dm_mpath_io *mpio = get_mpio_from_bio(bio);
+ dm_bio_restore(get_bio_details_from_mpio(mpio), bio);
+ r = __multipath_map_bio(m, bio, mpio);
switch (r) {
case DM_MAPIO_KILL:
bio->bi_status = BLK_STS_IOERR;
@@ -752,34 +811,11 @@ static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg,
return 0;
}
-static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
- struct dm_target *ti)
+static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, char **error)
{
- int r;
- struct pgpath *p;
- struct multipath *m = ti->private;
- struct request_queue *q = NULL;
+ struct request_queue *q = bdev_get_queue(bdev);
const char *attached_handler_name;
-
- /* we need at least a path arg */
- if (as->argc < 1) {
- ti->error = "no device given";
- return ERR_PTR(-EINVAL);
- }
-
- p = alloc_pgpath();
- if (!p)
- return ERR_PTR(-ENOMEM);
-
- r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
- &p->path.dev);
- if (r) {
- ti->error = "error getting device";
- goto bad;
- }
-
- if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) || m->hw_handler_name)
- q = bdev_get_queue(p->path.dev->bdev);
+ int r;
if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) {
retain:
@@ -811,26 +847,59 @@ retain:
char b[BDEVNAME_SIZE];
printk(KERN_INFO "dm-mpath: retaining handler on device %s\n",
- bdevname(p->path.dev->bdev, b));
+ bdevname(bdev, b));
goto retain;
}
if (r < 0) {
- ti->error = "error attaching hardware handler";
- dm_put_device(ti, p->path.dev);
- goto bad;
+ *error = "error attaching hardware handler";
+ return r;
}
if (m->hw_handler_params) {
r = scsi_dh_set_params(q, m->hw_handler_params);
if (r < 0) {
- ti->error = "unable to set hardware "
- "handler parameters";
- dm_put_device(ti, p->path.dev);
- goto bad;
+ *error = "unable to set hardware handler parameters";
+ return r;
}
}
}
+ return 0;
+}
+
+static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps,
+ struct dm_target *ti)
+{
+ int r;
+ struct pgpath *p;
+ struct multipath *m = ti->private;
+
+ /* we need at least a path arg */
+ if (as->argc < 1) {
+ ti->error = "no device given";
+ return ERR_PTR(-EINVAL);
+ }
+
+ p = alloc_pgpath();
+ if (!p)
+ return ERR_PTR(-ENOMEM);
+
+ r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table),
+ &p->path.dev);
+ if (r) {
+ ti->error = "error getting device";
+ goto bad;
+ }
+
+ if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
+ INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
+ r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error);
+ if (r) {
+ dm_put_device(ti, p->path.dev);
+ goto bad;
+ }
+ }
+
r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error);
if (r) {
dm_put_device(ti, p->path.dev);
@@ -838,7 +907,6 @@ retain:
}
return p;
-
bad:
free_pgpath(p);
return ERR_PTR(r);
@@ -933,7 +1001,8 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
if (!hw_argc)
return 0;
- if (m->queue_mode == DM_TYPE_BIO_BASED) {
+ if (m->queue_mode == DM_TYPE_BIO_BASED ||
+ m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
dm_consume_args(as, hw_argc);
DMERR("bio-based multipath doesn't allow hardware handler args");
return 0;
@@ -1022,6 +1091,8 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
if (!strcasecmp(queue_mode_name, "bio"))
m->queue_mode = DM_TYPE_BIO_BASED;
+ else if (!strcasecmp(queue_mode_name, "nvme"))
+ m->queue_mode = DM_TYPE_NVME_BIO_BASED;
else if (!strcasecmp(queue_mode_name, "rq"))
m->queue_mode = DM_TYPE_REQUEST_BASED;
else if (!strcasecmp(queue_mode_name, "mq"))
@@ -1122,7 +1193,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
- if (m->queue_mode == DM_TYPE_BIO_BASED)
+ if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED)
ti->per_io_data_size = multipath_per_bio_data_size();
else
ti->per_io_data_size = sizeof(struct dm_mpath_io);
@@ -1151,16 +1222,19 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m)
static void flush_multipath_work(struct multipath *m)
{
- set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
- smp_mb__after_atomic();
+ if (m->hw_handler_name) {
+ set_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
+ smp_mb__after_atomic();
+
+ flush_workqueue(kmpath_handlerd);
+ multipath_wait_for_pg_init_completion(m);
+
+ clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
+ smp_mb__after_atomic();
+ }
- flush_workqueue(kmpath_handlerd);
- multipath_wait_for_pg_init_completion(m);
flush_workqueue(kmultipathd);
flush_work(&m->trigger_event);
-
- clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
- smp_mb__after_atomic();
}
static void multipath_dtr(struct dm_target *ti)
@@ -1496,7 +1570,10 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
if (error && blk_path_error(error)) {
struct multipath *m = ti->private;
- r = DM_ENDIO_REQUEUE;
+ if (error == BLK_STS_RESOURCE)
+ r = DM_ENDIO_DELAY_REQUEUE;
+ else
+ r = DM_ENDIO_REQUEUE;
if (pgpath)
fail_path(pgpath);
@@ -1521,7 +1598,7 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone,
}
static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
- blk_status_t *error)
+ blk_status_t *error)
{
struct multipath *m = ti->private;
struct dm_mpath_io *mpio = get_mpio_from_bio(clone);
@@ -1546,9 +1623,6 @@ static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone,
goto done;
}
- /* Queue for the daemon to resubmit */
- dm_bio_r