summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-10-09 17:16:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-10-09 17:16:18 -0700
commit48915c2cbc77eceec2005afb695ac658fede4e0d (patch)
tree534b680b5203c37741e261b630f9c3584e743c47 /drivers/md
parentb9044ac8292fc94bee33f6f08acaed3ac55f0c75 (diff)
parent8ff232c1a819c2e98d85974a3bff0b7b8e2970ed (diff)
Merge tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - various fixes and cleanups for request-based DM core - add support for delaying the requeue of requests; used by DM multipath when all paths have failed and 'queue_if_no_path' is enabled - DM cache improvements to speedup the loading metadata and the writing of the hint array - fix potential for a dm-crypt crash on device teardown - remove dm_bufio_cond_resched() and just using cond_resched() - change DM multipath to return a reservation conflict error immediately; rather than failing the path and retrying (potentially indefinitely) * tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (24 commits) dm mpath: always return reservation conflict without failing over dm bufio: remove dm_bufio_cond_resched() dm crypt: fix crash on exit dm cache metadata: switch to using the new cursor api for loading metadata dm array: introduce cursor api dm btree: introduce cursor api dm cache policy smq: distribute entries to random levels when switching to smq dm cache: speed up writing of the hint array dm array: add dm_array_new() dm mpath: delay the requeue of blk-mq requests while all paths down dm mpath: use dm_mq_kick_requeue_list() dm rq: introduce dm_mq_kick_requeue_list() dm rq: reduce arguments passed to map_request() and dm_requeue_original_request() dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests dm: convert wait loops to use autoremove_wake_function() dm: use signal_pending_state() in dm_wait_for_completion() dm: rename task state function arguments dm: add two lockdep_assert_held() statements dm rq: simplify dm_old_stop_queue() dm mpath: check if path's request_queue is dying in activate_path() ...
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-bufio.c31
-rw-r--r--drivers/md/dm-cache-metadata.c183
-rw-r--r--drivers/md/dm-cache-policy-cleaner.c2
-rw-r--r--drivers/md/dm-cache-policy-internal.h6
-rw-r--r--drivers/md/dm-cache-policy-smq.c45
-rw-r--r--drivers/md/dm-cache-policy.h10
-rw-r--r--drivers/md/dm-crypt.c24
-rw-r--r--drivers/md/dm-mpath.c59
-rw-r--r--drivers/md/dm-rq.c104
-rw-r--r--drivers/md/dm-rq.h2
-rw-r--r--drivers/md/dm.c37
-rw-r--r--drivers/md/persistent-data/dm-array.c228
-rw-r--r--drivers/md/persistent-data/dm-array.h52
-rw-r--r--drivers/md/persistent-data/dm-btree.c162
-rw-r--r--drivers/md/persistent-data/dm-btree.h35
15 files changed, 715 insertions, 265 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 8625040bae92..125aedc3875f 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -191,19 +191,6 @@ static void dm_bufio_unlock(struct dm_bufio_client *c)
mutex_unlock(&c->lock);
}
-/*
- * FIXME Move to sched.h?
- */
-#ifdef CONFIG_PREEMPT_VOLUNTARY
-# define dm_bufio_cond_resched() \
-do { \
- if (unlikely(need_resched())) \
- _cond_resched(); \
-} while (0)
-#else
-# define dm_bufio_cond_resched() do { } while (0)
-#endif
-
/*----------------------------------------------------------------*/
/*
@@ -741,7 +728,7 @@ static void __flush_write_list(struct list_head *write_list)
list_entry(write_list->next, struct dm_buffer, write_list);
list_del(&b->write_list);
submit_io(b, WRITE, b->block, write_endio);
- dm_bufio_cond_resched();
+ cond_resched();
}
blk_finish_plug(&plug);
}
@@ -780,7 +767,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
__unlink_buffer(b);
return b;
}
- dm_bufio_cond_resched();
+ cond_resched();
}
list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
@@ -791,7 +778,7 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
__unlink_buffer(b);
return b;
}
- dm_bufio_cond_resched();
+ cond_resched();
}
return NULL;
@@ -923,7 +910,7 @@ static void __write_dirty_buffers_async(struct dm_bufio_client *c, int no_wait,
return;
__write_dirty_buffer(b, write_list);
- dm_bufio_cond_resched();
+ cond_resched();
}
}
@@ -973,7 +960,7 @@ static void __check_watermark(struct dm_bufio_client *c,
return;
__free_buffer_wake(b);
- dm_bufio_cond_resched();
+ cond_resched();
}
if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
@@ -1170,7 +1157,7 @@ void dm_bufio_prefetch(struct dm_bufio_client *c,
submit_io(b, READ, b->block, read_endio);
dm_bufio_release(b);
- dm_bufio_cond_resched();
+ cond_resched();
if (!n_blocks)
goto flush_plug;
@@ -1291,7 +1278,7 @@ again:
!test_bit(B_WRITING, &b->state))
__relink_lru(b, LIST_CLEAN);
- dm_bufio_cond_resched();
+ cond_resched();
/*
* If we dropped the lock, the list is no longer consistent,
@@ -1574,7 +1561,7 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
freed++;
if (!--nr_to_scan || ((count - freed) <= retain_target))
return freed;
- dm_bufio_cond_resched();
+ cond_resched();
}
}
return freed;
@@ -1808,7 +1795,7 @@ static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
if (__try_evict_buffer(b, 0))
count--;
- dm_bufio_cond_resched();
+ cond_resched();
}
dm_bufio_unlock(c);
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 3970cda10080..695577812cf6 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -140,6 +140,13 @@ struct dm_cache_metadata {
* the device.
*/
bool fail_io:1;
+
+ /*
+ * These structures are used when loading metadata. They're too
+ * big to put on the stack.
+ */
+ struct dm_array_cursor mapping_cursor;
+ struct dm_array_cursor hint_cursor;
};
/*-------------------------------------------------------------------
@@ -1171,31 +1178,37 @@ static bool hints_array_available(struct dm_cache_metadata *cmd,
hints_array_initialized(cmd);
}
-static int __load_mapping(void *context, uint64_t cblock, void *leaf)
+static int __load_mapping(struct dm_cache_metadata *cmd,
+ uint64_t cb, bool hints_valid,
+ struct dm_array_cursor *mapping_cursor,
+ struct dm_array_cursor *hint_cursor,
+ load_mapping_fn fn, void *context)
{
int r = 0;
- bool dirty;
- __le64 value;
- __le32 hint_value = 0;
+
+ __le64 mapping;
+ __le32 hint = 0;
+
+ __le64 *mapping_value_le;
+ __le32 *hint_value_le;
+
dm_oblock_t oblock;
unsigned flags;
- struct thunk *thunk = context;
- struct dm_cache_metadata *cmd = thunk->cmd;
- memcpy(&value, leaf, sizeof(value));
- unpack_value(value, &oblock, &flags);
+ dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
+ memcpy(&mapping, mapping_value_le, sizeof(mapping));
+ unpack_value(mapping, &oblock, &flags);
if (flags & M_VALID) {
- if (thunk->hints_valid) {
- r = dm_array_get_value(&cmd->hint_info, cmd->hint_root,
- cblock, &hint_value);
- if (r && r != -ENODATA)
- return r;
+ if (hints_valid) {
+ dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
+ memcpy(&hint, hint_value_le, sizeof(hint));
}
- dirty = thunk->respect_dirty_flags ? (flags & M_DIRTY) : true;
- r = thunk->fn(thunk->context, oblock, to_cblock(cblock),
- dirty, le32_to_cpu(hint_value), thunk->hints_valid);
+ r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY,
+ le32_to_cpu(hint), hints_valid);
+ if (r)
+ DMERR("policy couldn't load cblock");
}
return r;
@@ -1205,16 +1218,60 @@ static int __load_mappings(struct dm_cache_metadata *cmd,
struct dm_cache_policy *policy,
load_mapping_fn fn, void *context)
{
- struct thunk thunk;
+ int r;
+ uint64_t cb;
+
+ bool hints_valid = hints_array_available(cmd, policy);
+
+ if (from_cblock(cmd->cache_blocks) == 0)
+ /* Nothing to do */
+ return 0;
+
+ r = dm_array_cursor_begin(&cmd->info, cmd->root, &cmd->mapping_cursor);
+ if (r)
+ return r;
- thunk.fn = fn;
- thunk.context = context;
+ if (hints_valid) {
+ r = dm_array_cursor_begin(&cmd->hint_info, cmd->hint_root, &cmd->hint_cursor);
+ if (r) {
+ dm_array_cursor_end(&cmd->mapping_cursor);
+ return r;
+ }
+ }
+
+ for (cb = 0; ; cb++) {
+ r = __load_mapping(cmd, cb, hints_valid,
+ &cmd->mapping_cursor, &cmd->hint_cursor,
+ fn, context);
+ if (r)
+ goto out;
+
+ /*
+ * We need to break out before we move the cursors.
+ */
+ if (cb >= (from_cblock(cmd->cache_blocks) - 1))
+ break;
- thunk.cmd = cmd;
- thunk.respect_dirty_flags = cmd->clean_when_opened;
- thunk.hints_valid = hints_array_available(cmd, policy);
+ r = dm_array_cursor_next(&cmd->mapping_cursor);
+ if (r) {
+ DMERR("dm_array_cursor_next for mapping failed");
+ goto out;
+ }
- return dm_array_walk(&cmd->info, cmd->root, __load_mapping, &thunk);
+ if (hints_valid) {
+ r = dm_array_cursor_next(&cmd->hint_cursor);
+ if (r) {
+ DMERR("dm_array_cursor_next for hint failed");
+ goto out;
+ }
+ }
+ }
+out:
+ dm_array_cursor_end(&cmd->mapping_cursor);
+ if (hints_valid)
+ dm_array_cursor_end(&cmd->hint_cursor);
+
+ return r;
}
int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
@@ -1368,10 +1425,24 @@ int dm_cache_get_metadata_dev_size(struct dm_cache_metadata *cmd,
/*----------------------------------------------------------------*/
-static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
+static int get_hint(uint32_t index, void *value_le, void *context)
+{
+ uint32_t value;
+ struct dm_cache_policy *policy = context;
+
+ value = policy_get_hint(policy, to_cblock(index));
+ *((__le32 *) value_le) = cpu_to_le32(value);
+
+ return 0;
+}
+
+/*
+ * It's quicker to always delete the hint array, and recreate with
+ * dm_array_new().
+ */
+static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
{
int r;
- __le32 value;
size_t hint_size;
const char *policy_name = dm_cache_policy_get_name(policy);
const unsigned *policy_version = dm_cache_policy_get_version(policy);
@@ -1380,63 +1451,23 @@ static int begin_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *po
(strlen(policy_name) > sizeof(cmd->policy_name) - 1))
return -EINVAL;
- if (!policy_unchanged(cmd, policy)) {
- strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
- memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
-
- hint_size = dm_cache_policy_get_hint_size(policy);
- if (!hint_size)
- return 0; /* short-circuit hints initialization */
- cmd->policy_hint_size = hint_size;
+ strncpy(cmd->policy_name, policy_name, sizeof(cmd->policy_name));
+ memcpy(cmd->policy_version, policy_version, sizeof(cmd->policy_version));
- if (cmd->hint_root) {
- r = dm_array_del(&cmd->hint_info, cmd->hint_root);
- if (r)
- return r;
- }
+ hint_size = dm_cache_policy_get_hint_size(policy);
+ if (!hint_size)
+ return 0; /* short-circuit hints initialization */
+ cmd->policy_hint_size = hint_size;
- r = dm_array_empty(&cmd->hint_info, &cmd->hint_root);
+ if (cmd->hint_root) {
+ r = dm_array_del(&cmd->hint_info, cmd->hint_root);
if (r)
return r;
-
- value = cpu_to_le32(0);
- __dm_bless_for_disk(&value);
- r = dm_array_resize(&cmd->hint_info, cmd->hint_root, 0,
- from_cblock(cmd->cache_blocks),
- &value, &cmd->hint_root);
- if (r)
- return r;
- }
-
- return 0;
-}
-
-static int save_hint(void *context, dm_cblock_t cblock, dm_oblock_t oblock, uint32_t hint)
-{
- struct dm_cache_metadata *cmd = context;
- __le32 value = cpu_to_le32(hint);
- int r;
-
- __dm_bless_for_disk(&value);
-
- r = dm_array_set_value(&cmd->hint_info, cmd->hint_root,
- from_cblock(cblock), &value, &cmd->hint_root);
- cmd->changed = true;
-
- return r;
-}
-
-static int write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
-{
- int r;
-
- r = begin_hints(cmd, policy);
- if (r) {
- DMERR("begin_hints failed");
- return r;
}
- return policy_walk_mappings(policy, save_hint, cmd);
+ return dm_array_new(&cmd->hint_info, &cmd->hint_root,
+ from_cblock(cmd->cache_blocks),
+ get_hint, policy);
}
int dm_cache_write_hints(struct dm_cache_metadata *cmd, struct dm_cache_policy *policy)
diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c
index 14aaaf059f06..2e8a8f1d8358 100644
--- a/drivers/md/dm-cache-policy-cleaner.c
+++ b/drivers/md/dm-cache-policy-cleaner.c
@@ -395,7 +395,7 @@ static void init_policy_functions(struct policy *p)
p->policy.set_dirty = wb_set_dirty;
p->policy.clear_dirty = wb_clear_dirty;
p->policy.load_mapping = wb_load_mapping;
- p->policy.walk_mappings = NULL;
+ p->policy.get_hint = NULL;
p->policy.remove_mapping = wb_remove_mapping;
p->policy.writeback_work = wb_writeback_work;
p->policy.force_mapping = wb_force_mapping;
diff --git a/drivers/md/dm-cache-policy-internal.h b/drivers/md/dm-cache-policy-internal.h
index 2816018faa7f..808ee0e2b2c4 100644
--- a/drivers/md/dm-cache-policy-internal.h
+++ b/drivers/md/dm-cache-policy-internal.h
@@ -48,10 +48,10 @@ static inline int policy_load_mapping(struct dm_cache_policy *p,
return p->load_mapping(p, oblock, cblock, hint, hint_valid);
}
-static inline int policy_walk_mappings(struct dm_cache_policy *p,
- policy_walk_fn fn, void *context)
+static inline uint32_t policy_get_hint(struct dm_cache_policy *p,
+ dm_cblock_t cblock)
{
- return p->walk_mappings ? p->walk_mappings(p, fn, context) : 0;
+ return p->get_hint ? p->get_hint(p, cblock) : 0;
}
static inline int policy_writeback_work(struct dm_cache_policy *p,
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index cf48a617a3a4..c33f4a6e1d7d 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -1359,6 +1359,11 @@ static void smq_clear_dirty(struct dm_cache_policy *p, dm_oblock_t oblock)
spin_unlock_irqrestore(&mq->lock, flags);
}
+static unsigned random_level(dm_cblock_t cblock)
+{
+ return hash_32_generic(from_cblock(cblock), 9) & (NR_CACHE_LEVELS - 1);
+}
+
static int smq_load_mapping(struct dm_cache_policy *p,
dm_oblock_t oblock, dm_cblock_t cblock,
uint32_t hint, bool hint_valid)
@@ -1369,47 +1374,21 @@ static int smq_load_mapping(struct dm_cache_policy *p,
e = alloc_particular_entry(&mq->cache_alloc, from_cblock(cblock));
e->oblock = oblock;
e->dirty = false; /* this gets corrected in a minute */
- e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : 1;
+ e->level = hint_valid ? min(hint, NR_CACHE_LEVELS - 1) : random_level(cblock);
push(mq, e);
return 0;
}
-static int smq_save_hints(struct smq_policy *mq, struct queue *q,
- policy_walk_fn fn, void *context)
-{
- int r;
- unsigned level;
- struct entry *e;
-
- for (level = 0; level < q->nr_levels; level++)
- for (e = l_head(q->es, q->qs + level); e; e = l_next(q->es, e)) {
- if (!e->sentinel) {
- r = fn(context, infer_cblock(mq, e),
- e->oblock, e->level);
- if (r)
- return r;
- }
- }
-
- return 0;
-}
-
-static int smq_walk_mappings(struct dm_cache_policy *p, policy_walk_fn fn,
- void *context)
+static uint32_t smq_get_hint(struct dm_cache_policy *p, dm_cblock_t cblock)
{
struct smq_policy *mq = to_smq_policy(p);
- int r = 0;
+ struct entry *e = get_entry(&mq->cache_alloc, from_cblock(cblock));
- /*
- * We don't need to lock here since this method is only called once
- * the IO has stopped.
- */
- r = smq_save_hints(mq, &mq->clean, fn, context);
- if (!r)
- r = smq_save_hints(mq, &mq->dirty, fn, context);
+ if (!e->allocated)
+ return 0;
- return r;
+ return e->level;
}
static void __remove_mapping(struct smq_policy *mq, dm_oblock_t oblock)
@@ -1616,7 +1595,7 @@ static void init_policy_functions(struct smq_policy *mq, bool mimic_mq)
mq->policy.set_dirty = smq_set_dirty;
mq->policy.clear_dirty = smq_clear_dirty;
mq->policy.load_mapping = smq_load_mapping;
- mq->policy.walk_mappings = smq_walk_mappings;
+ mq->policy.get_hint = smq_get_hint;
mq->policy.remove_mapping = smq_remove_mapping;
mq->policy.remove_cblock = smq_remove_cblock;
mq->policy.writeback_work = smq_writeback_work;
diff --git a/drivers/md/dm-cache-policy.h b/drivers/md/dm-cache-policy.h
index 05db56eedb6a..aa10b1493f34 100644
--- a/drivers/md/dm-cache-policy.h
+++ b/drivers/md/dm-cache-policy.h
@@ -90,9 +90,6 @@ struct policy_result {
dm_cblock_t cblock; /* POLICY_HIT, POLICY_NEW, POLICY_REPLACE */
};
-typedef int (*policy_walk_fn)(void *context, dm_cblock_t cblock,
- dm_oblock_t oblock, uint32_t hint);
-
/*
* The cache policy object. Just a bunch of methods. It is envisaged that
* this structure will be embedded in a bigger, policy specific structure
@@ -158,8 +155,11 @@ struct dm_cache_policy {
int (*load_mapping)(struct dm_cache_policy *p, dm_oblock_t oblock,
dm_cblock_t cblock, uint32_t hint, bool hint_valid);
- int (*walk_mappings)(struct dm_cache_policy *p, policy_walk_fn fn,
- void *context);
+ /*
+ * Gets the hint for a given cblock. Called in a single threaded
+ * context. So no locking required.
+ */
+ uint32_t (*get_hint)(struct dm_cache_policy *p, dm_cblock_t cblock);
/*
* Override functions used on the error paths of the core target.
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 0448e7e35c8c..a2768835d394 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -113,8 +113,7 @@ struct iv_tcw_private {
* and encrypts / decrypts at the same time.
*/
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
- DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
- DM_CRYPT_EXIT_THREAD};
+ DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
/*
* The fields in here must be read only after initialization.
@@ -1207,18 +1206,20 @@ continue_locked:
if (!RB_EMPTY_ROOT(&cc->write_tree))
goto pop_from_list;
- if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) {
- spin_unlock_irq(&cc->write_thread_wait.lock);
- break;
- }
-
- __set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_INTERRUPTIBLE);
__add_wait_queue(&cc->write_thread_wait, &wait);
spin_unlock_irq(&cc->write_thread_wait.lock);
+ if (unlikely(kthread_should_stop())) {
+ set_task_state(current, TASK_RUNNING);
+ remove_wait_queue(&cc->write_thread_wait, &wait);
+ break;
+ }
+
schedule();
+ set_task_state(current, TASK_RUNNING);
spin_lock_irq(&cc->write_thread_wait.lock);
__remove_wait_queue(&cc->write_thread_wait, &wait);
goto continue_locked;
@@ -1533,13 +1534,8 @@ static void crypt_dtr(struct dm_target *ti)
if (!cc)
return;
- if (cc->write_thread) {
- spin_lock_irq(&cc->write_thread_wait.lock);
- set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags);
- wake_up_locked(&cc->write_thread_wait);
- spin_unlock_irq(&cc->write_thread_wait.lock);
+ if (cc->write_thread)
kthread_stop(cc->write_thread);
- }
if (cc->io_queue)
destroy_workqueue(cc->io_queue);
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index ac734e5bbe48..e477af8596e2 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -550,9 +550,9 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
pgpath = choose_pgpath(m, nr_bytes);
if (!pgpath) {
- if (!must_push_back_rq(m))
- r = -EIO; /* Failed */
- return r;
+ if (must_push_back_rq(m))
+ return DM_MAPIO_DELAY_REQUEUE;
+ return -EIO; /* Failed */
} else if (test_bit(MPATHF_QUEUE_IO, &m->flags) ||
test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) {
pg_init_all_paths(m);
@@ -680,9 +680,11 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio)
return __multipath_map_bio(m, bio, mpio);
}
-static void process_queued_bios_list(struct multipath *m)
+static void process_queued_io_list(struct multipath *m)
{
- if (m->queue_mode == DM_TYPE_BIO_BASED)
+ if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
+ dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
+ else if (m->queue_mode == DM_TYPE_BIO_BASED)
queue_work(kmultipathd, &m->process_queued_bios);
}
@@ -752,7 +754,7 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path,
if (!queue_if_no_path) {
dm_table_run_md_queue_async(m->ti->table);
- process_queued_bios_list(m);
+ process_queued_io_list(m);
}
return 0;
@@ -1193,21 +1195,17 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
static void multipath_wait_for_pg_init_completion(struct multipath *m)
{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(&m->pg_init_wait, &wait);
+ DEFINE_WAIT(wait);
while (1) {
- set_current_state(TASK_UNINTERRUPTIBLE);
+ prepare_to_wait(&m->pg_init_wait, &wait, TASK_UNINTERRUPTIBLE);
if (!atomic_read(&m->pg_init_in_progress))
break;
io_schedule();
}
- set_current_state(TASK_RUNNING);
-
- remove_wait_queue(&m->pg_init_wait, &wait);
+ finish_wait(&m->pg_init_wait, &wait);
}
static void flush_multipath_work(struct multipath *m)
@@ -1308,7 +1306,7 @@ out:
spin_unlock_irqrestore(&m->lock, flags);
if (run_queue) {
dm_table_run_md_queue_async(m->ti->table);
- process_queued_bios_list(m);
+ process_queued_io_list(m);
}
return r;
@@ -1506,7 +1504,7 @@ static void pg_init_done(void *data, int errors)
}
clear_bit(MPATHF_QUEUE_IO, &m->flags);
- process_queued_bios_list(m);
+ process_queued_io_list(m);
/*
* Wake up any thread waiting to suspend.
@@ -1521,10 +1519,10 @@ static void activate_path(struct work_struct *work)
{
struct pgpath *pgpath =
container_of(work, struct pgpath, activate_path.work);
+ struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev);
- if (pgpath->is_active)
- scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
- pg_init_done, pgpath);
+ if (pgpath->is_active && !blk_queue_dying(q))
+ scsi_dh_activate(q, pg_init_done, pgpath);
else
pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED);
}
@@ -1532,6 +1530,14 @@ static void activate_path(struct work_struct *work)
static int noretry_error(int error)
{
switch (error) {
+ case -EBADE:
+ /*
+ * EBADE signals an reservation conflict.
+ * We shouldn't fail the path here as we can communicate with
+ * the target. We should failover to the next path, but in
+ * doing so we might be causing a ping-pong between paths.
+ * So just return the reservation conflict error.
+ */
case -EOPNOTSUPP:
case -EREMOTEIO:
case -EILSEQ:
@@ -1576,9 +1582,6 @@ static int do_end_io(struct multipath *m, struct request *clone,
if (!test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
if (!must_push_back_rq(m))
r = -EIO;
- } else {
- if (error == -EBADE)
- r = error;
}
}
@@ -1627,9 +1630,6 @@ static int do_end_io_bio(struct multipath *m, struct bio *clone,
if (!must_push_back_bio(m))
return -EIO;
return DM_ENDIO_REQUEUE;
- } else {
- if (error == -EBADE)
- return error;
}
}
@@ -1941,7 +1941,7 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
pg_init_all_paths(m);
dm_table_run_md_queue_async(m->ti->table);
- process_queued_bios_list(m);
+ process_queued_io_list(m);
}
/*
@@ -1994,11 +1994,14 @@ static int multipath_busy(struct dm_target *ti)
struct priority_group *pg, *next_pg;
struct pgpath *pgpath;
- /* pg_init in progress or no paths available */
- if (atomic_read(&m->pg_init_in_progress) ||
- (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)))
+ /* pg_init in progress */
+ if (atomic_read(&m->pg_init_in_progress))
return true;
+ /* no paths available, for blk-mq: rely on IO mapping to delay requeue */
+ if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
+ return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
+
/* Guess which priority_group will be used at next mapping time */
pg = lockless_dereference(m->current_pg);
next_pg = lockless_dereference(m->next_pg);
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index ee48230a2952..182b67947dad 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -73,15 +73,24 @@ static void dm_old_start_queue(struct request_queue *q)
spin_unlock_irqrestore(q->queue_lock, flags);
}
+static void dm_mq_start_queue(struct request_queue *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
+ queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ blk_mq_start_stopped_hw_queues(q, true);
+ blk_mq_kick_requeue_list(q);
+}
+
void dm_start_queue(struct request_queue *q)
{
if (!q->mq_ops)
dm_old_start_queue(q);
- else {
- queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, q);
- blk_mq_start_stopped_hw_queues(q, true);
- blk_mq_kick_requeue_list(q);
- }
+ else
+ dm_mq_start_queue(q);
}
static void dm_old_stop_queue(struct request_queue *q)
@@ -89,27 +98,35 @@ static void dm_old_stop_queue(struct request_queue *q)
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
+ if (!blk_queue_stopped(q))
+ blk_stop_queue(q);
+ spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void dm_mq_stop_queue(struct request_queue *q)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(q->queue_lock, flags);
if (blk_queue_stopped(q)) {
spin_unlock_irqrestore(q->queue_lock, flags);
return;
}
- blk_stop_queue(q);
+ queue_flag_set(QUEUE_FLAG_STOPPED, q);
spin_unlock_irqrestore(q->queue_lock, flags);
+
+ /* Avoid that requeuing could restart the queue. */
+ blk_mq_cancel_requeue_work(q);
+ blk_mq_stop_hw_queues(q);
}
void dm_stop_queue(struct request_queue *q)
{
if (!q->mq_ops)
dm_old_stop_queue(q);
- else {
- spin_lock_irq(q->queue_lock);
- queue_flag_set(QUEUE_FLAG_STOPPED, q);
- spin_unlock_irq(q->queue_lock);
-
- blk_mq_cancel_requeue_work(q);
- blk_mq_stop_hw_queues(q);
- }
+ else
+ dm_mq_stop_queue(q);
}
static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
@@ -319,21 +336,32 @@ static void dm_old_requeue_request(struct request *rq)
spin_unlock_irqrestore(q->queue_lock, flags);
}
-static void dm_mq_requeue_request(struct request *rq)
+static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
{
- struct request_queue *q = rq->q;
unsigned long flags;
- blk_mq_requeue_request(rq);
spin_lock_irqsave(q->queue_lock, flags);
if (!blk_queue_stopped(q))
- blk_mq_kick_requeue_list(q);
+ blk_mq_delay_kick_requeue_list(q, msecs);
spin_unlock_irqrestore(q->queue_lock, flags);
}
-static void dm_requeue_original_request(struct mapped_device *md,
- struct request *rq)
+void dm_mq_kick_requeue_list(struct mapped_device *md)
+{
+ __dm_mq_kick_requeue_list(dm_get_md_queue(md), 0);
+}
+EXPORT_SYMBOL(dm_mq_kick_requeue_list);
+
+static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
+{
+ blk_mq_requeue_request(rq);
+ __dm_mq_kick_requeue_list(rq->q, msecs);
+}
+
+static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
{
+ struct mapped_device *md = tio->md;
+ struct request *rq = tio->orig;
int rw = rq_data_dir(rq);
rq_end_stats(md, rq);
@@ -342,7 +370,7 @@ static void dm_requeue_original_request(struct mapped_device *md,
if (!rq->q->mq_ops)
dm_old_requeue_request(rq);
else
- dm_mq_requeue_request(rq);
+ dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0);
rq_completed(md, rw, false);
}
@@ -372,7 +400,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
return;
else if (r == DM_ENDIO_REQUEUE)
/* The target wants to requeue the I/O */
- dm_requeue_original_request(tio->md, tio->orig);
+ dm_requeue_original_request(tio, false);
else {
DMWARN("unimplemented target endio return value: %d", r);
BUG();
@@ -612,20 +640,23 @@ static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
/*
* Returns:
- * 0 : the request has been processed
- * DM_MAPIO_REQUEUE : the original request needs to be requeued
+ * DM_MAPIO_* : the request has been processed as indicated
+ * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
* < 0 : the request was completed due to failure
*/
-static int map_request(struct dm_rq_target_io *tio, struct request *rq,
- struct mapped_device *md)
+static int map_request(struct dm_rq_target_io *tio)
{
int r;
struct dm_target *ti = tio->ti;
+ struct mapped_device *md = tio->md;
+ struct request *rq = tio->orig;
struct request *clone = NULL;
if (tio->clone) {
clone = tio->clone;
r = ti->type->map_rq(ti, clone, &tio->info);
+ if (r == DM_MAPIO_DELAY_REQUEUE)
+ return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */
} else {
r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
if (r < 0) {
@@ -633,9 +664,8 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
dm_kill_unmapped_request(rq, r);
return r;
}
- if (r != DM_MAPIO_REMAPPED)
- return r;
- if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
+ if (r == DM_MAPIO_REMAPPED &&
+ setup_clone(clone, rq, tio, GFP_ATOMIC)) {
/* -ENOMEM */
ti->type->release_clone_rq(clone);
return DM_MAPIO_REQUEUE;
@@ -654,7 +684,10 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
break;
case DM_MAPIO_REQUEUE:
/* The target wants to requeue the I/O */
- dm_requeue_original_request(md, tio->orig);
+ break;
+ case DM_MAPIO_DELAY_REQUEUE:
+ /* The target wants to requeue the I/O after a delay */
+ dm_requeue_original_request(tio, true);
break;
default:
if (r > 0) {
@@ -664,10 +697,9 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
/* The target wants to complete the I/O */
dm_kill_unmapped_request(rq, r);
- return r;
}
- return 0;
+ return r;
}
static void dm_start_request(struct mapped_device *md, struct request *orig)
@@ -706,11 +738,9 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
static void map_tio_request(struct kthread_work *work)
{
struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
- struct request *rq = tio->orig;
- struct mapped_device *md = tio->md;
- if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
- dm_requeue_original_request(md, rq);
+ if (map_request(tio) == DM_MAPIO_REQUEUE)
+ dm_requeue_original_request(tio, false);
}
ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
@@ -896,7 +926,7 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
tio->ti = ti;
/* Direct call is fine since .queue_rq allows allocations */
- if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+ if (map_request(tio) == DM_MAPIO_REQUEUE) {
/* Undo dm_start_request() before requeuing */
rq_end_stats(md, rq);
rq_completed(md, rq_data_dir(rq), false);
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index