summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-05 14:27:02 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-05 14:27:02 -0700
commit3526dd0c7832f1011a0477cc6d903662bae05ea8 (patch)
tree22fbac64eb40a0b29bfa4c029695f39b2f591e62 /drivers/md
parentdd972f924df6bdbc0ab185a38d5d2361dbc26311 (diff)
parentbc6d65e6dc89c3b7ff78e4ad797117c122ffde8e (diff)
Merge tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block
Pull block layer updates from Jens Axboe: "It's a pretty quiet round this time, which is nice. This contains: - series from Bart, cleaning up the way we set/test/clear atomic queue flags. - series from Bart, fixing races between gendisk and queue registration and removal. - set of bcache fixes and improvements from various folks, by way of Michael Lyle. - set of lightnvm updates from Matias, most of it being the 1.2 to 2.0 transition. - removal of unused DIO flags from Nikolay. - blk-mq/sbitmap memory ordering fixes from Omar. - divide-by-zero fix for BFQ from Paolo. - minor documentation patches from Randy. - timeout fix from Tejun. - Alpha "can't write a char atomically" fix from Mikulas. - set of NVMe fixes by way of Keith. - bsg and bsg-lib improvements from Christoph. - a few sed-opal fixes from Jonas. - cdrom check-disk-change deadlock fix from Maurizio. - various little fixes, comment fixes, etc from various folks" * tag 'for-4.17/block-20180402' of git://git.kernel.dk/linux-block: (139 commits) blk-mq: Directly schedule q->timeout_work when aborting a request blktrace: fix comment in blktrace_api.h lightnvm: remove function name in strings lightnvm: pblk: remove some unnecessary NULL checks lightnvm: pblk: don't recover unwritten lines lightnvm: pblk: implement 2.0 support lightnvm: pblk: implement get log report chunk lightnvm: pblk: rename ppaf* to addrf* lightnvm: pblk: check for supported version lightnvm: implement get log report chunk helpers lightnvm: make address conversions depend on generic device lightnvm: add support for 2.0 address format lightnvm: normalize geometry nomenclature lightnvm: complete geo structure with maxoc* lightnvm: add shorten OCSSD version in geo lightnvm: add minor version to generic geometry lightnvm: simplify geometry structure lightnvm: pblk: refactor init/exit sequences lightnvm: Avoid validation of default op value lightnvm: centralize permission check for lightnvm ioctl ...
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/alloc.c3
-rw-r--r--drivers/md/bcache/bcache.h57
-rw-r--r--drivers/md/bcache/bset.c4
-rw-r--r--drivers/md/bcache/bset.h5
-rw-r--r--drivers/md/bcache/btree.c26
-rw-r--r--drivers/md/bcache/closure.c17
-rw-r--r--drivers/md/bcache/closure.h5
-rw-r--r--drivers/md/bcache/debug.c14
-rw-r--r--drivers/md/bcache/extents.c2
-rw-r--r--drivers/md/bcache/io.c16
-rw-r--r--drivers/md/bcache/journal.c8
-rw-r--r--drivers/md/bcache/request.c186
-rw-r--r--drivers/md/bcache/super.c160
-rw-r--r--drivers/md/bcache/sysfs.c55
-rw-r--r--drivers/md/bcache/util.c25
-rw-r--r--drivers/md/bcache/util.h6
-rw-r--r--drivers/md/bcache/writeback.c92
-rw-r--r--drivers/md/bcache/writeback.h4
-rw-r--r--drivers/md/dm-table.c16
-rw-r--r--drivers/md/dm.c2
-rw-r--r--drivers/md/md-linear.c4
-rw-r--r--drivers/md/md.c10
-rw-r--r--drivers/md/raid0.c4
-rw-r--r--drivers/md/raid1.c6
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/md/raid5.c4
26 files changed, 582 insertions, 155 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 458e1d38577d..004cc3cc6123 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -287,7 +287,8 @@ do { \
break; \
\
mutex_unlock(&(ca)->set->bucket_lock); \
- if (kthread_should_stop()) { \
+ if (kthread_should_stop() || \
+ test_bit(CACHE_SET_IO_DISABLE, &ca->set->flags)) { \
set_current_state(TASK_RUNNING); \
return 0; \
} \
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 12e5197f186c..d338b7086013 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -188,6 +188,7 @@
#include <linux/refcount.h>
#include <linux/types.h>
#include <linux/workqueue.h>
+#include <linux/kthread.h>
#include "bset.h"
#include "util.h"
@@ -258,10 +259,11 @@ struct bcache_device {
struct gendisk *disk;
unsigned long flags;
-#define BCACHE_DEV_CLOSING 0
-#define BCACHE_DEV_DETACHING 1
-#define BCACHE_DEV_UNLINK_DONE 2
-
+#define BCACHE_DEV_CLOSING 0
+#define BCACHE_DEV_DETACHING 1
+#define BCACHE_DEV_UNLINK_DONE 2
+#define BCACHE_DEV_WB_RUNNING 3
+#define BCACHE_DEV_RATE_DW_RUNNING 4
unsigned nr_stripes;
unsigned stripe_size;
atomic_t *stripe_sectors_dirty;
@@ -286,6 +288,12 @@ struct io {
sector_t last;
};
+enum stop_on_failure {
+ BCH_CACHED_DEV_STOP_AUTO = 0,
+ BCH_CACHED_DEV_STOP_ALWAYS,
+ BCH_CACHED_DEV_STOP_MODE_MAX,
+};
+
struct cached_dev {
struct list_head list;
struct bcache_device disk;
@@ -359,6 +367,7 @@ struct cached_dev {
unsigned sequential_cutoff;
unsigned readahead;
+ unsigned io_disable:1;
unsigned verify:1;
unsigned bypass_torture_test:1;
@@ -378,6 +387,11 @@ struct cached_dev {
unsigned writeback_rate_i_term_inverse;
unsigned writeback_rate_p_term_inverse;
unsigned writeback_rate_minimum;
+
+ enum stop_on_failure stop_when_cache_set_failed;
+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
+ atomic_t io_errors;
+ unsigned error_limit;
};
enum alloc_reserve {
@@ -474,10 +488,15 @@ struct gc_stat {
*
* CACHE_SET_RUNNING means all cache devices have been registered and journal
* replay is complete.
+ *
+ * CACHE_SET_IO_DISABLE is set when bcache is stopping the whold cache set, all
+ * external and internal I/O should be denied when this flag is set.
+ *
*/
#define CACHE_SET_UNREGISTERING 0
#define CACHE_SET_STOPPING 1
#define CACHE_SET_RUNNING 2
+#define CACHE_SET_IO_DISABLE 3
struct cache_set {
struct closure cl;
@@ -867,8 +886,36 @@ static inline void wake_up_allocators(struct cache_set *c)
wake_up_process(ca->alloc_thread);
}
+static inline void closure_bio_submit(struct cache_set *c,
+ struct bio *bio,
+ struct closure *cl)
+{
+ closure_get(cl);
+ if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return;
+ }
+ generic_make_request(bio);
+}
+
+/*
+ * Prevent the kthread exits directly, and make sure when kthread_stop()
+ * is called to stop a kthread, it is still alive. If a kthread might be
+ * stopped by CACHE_SET_IO_DISABLE bit set, wait_for_kthread_stop() is
+ * necessary before the kthread returns.
+ */
+static inline void wait_for_kthread_stop(void)
+{
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+}
+
/* Forward declarations */
+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
blk_status_t, const char *);
@@ -896,6 +943,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool);
bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
unsigned, unsigned, bool);
+bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *, const char *, ...);
@@ -905,6 +953,7 @@ void bch_write_bdev_super(struct cached_dev *, struct closure *);
extern struct workqueue_struct *bcache_wq;
extern const char * const bch_cache_modes[];
+extern const char * const bch_stop_on_failure_modes[];
extern struct mutex bch_register_lock;
extern struct list_head bch_cache_sets;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index e56d3ecdbfcb..579c696a5fe0 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1072,7 +1072,7 @@ EXPORT_SYMBOL(bch_btree_iter_init);
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
btree_iter_cmp_fn *cmp)
{
- struct btree_iter_set unused;
+ struct btree_iter_set b __maybe_unused;
struct bkey *ret = NULL;
if (!btree_iter_end(iter)) {
@@ -1087,7 +1087,7 @@ static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
}
if (iter->data->k == iter->data->end)
- heap_pop(iter, unused, cmp);
+ heap_pop(iter, b, cmp);
else
heap_sift(iter, 0, cmp);
}
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index fa506c1aa524..0c24280f3b98 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -531,14 +531,15 @@ int __bch_keylist_realloc(struct keylist *, unsigned);
#ifdef CONFIG_BCACHE_DEBUG
int __bch_count_data(struct btree_keys *);
-void __bch_check_keys(struct btree_keys *, const char *, ...);
+void __printf(2, 3) __bch_check_keys(struct btree_keys *, const char *, ...);
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
void bch_dump_bucket(struct btree_keys *);
#else
static inline int __bch_count_data(struct btree_keys *b) { return -1; }
-static inline void __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
+static inline void __printf(2, 3)
+ __bch_check_keys(struct btree_keys *b, const char *fmt, ...) {}
static inline void bch_dump_bucket(struct btree_keys *b) {}
void bch_dump_bset(struct btree_keys *, struct bset *, unsigned);
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index fad9fe8817eb..17936b2dc7d6 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -665,6 +665,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
struct btree *b, *t;
unsigned long i, nr = sc->nr_to_scan;
unsigned long freed = 0;
+ unsigned int btree_cache_used;
if (c->shrinker_disabled)
return SHRINK_STOP;
@@ -689,9 +690,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
nr = min_t(unsigned long, nr, mca_can_free(c));
i = 0;
+ btree_cache_used = c->btree_cache_used;
list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
- if (freed >= nr)
- break;
+ if (nr <= 0)
+ goto out;
if (++i > 3 &&
!mca_reap(b, 0, false)) {
@@ -699,9 +701,10 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
rw_unlock(true, b);
freed++;
}
+ nr--;
}
- for (i = 0; (nr--) && i < c->btree_cache_used; i++) {
+ for (; (nr--) && i < btree_cache_used; i++) {
if (list_empty(&c->btree_cache))
goto out;
@@ -719,7 +722,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
}
out:
mutex_unlock(&c->bucket_lock);
- return freed;
+ return freed * c->btree_pages;
}
static unsigned long bch_mca_count(struct shrinker *shrink,
@@ -959,7 +962,7 @@ err:
return b;
}
-/**
+/*
* bch_btree_node_get - find a btree node in the cache and lock it, reading it
* in from disk if necessary.
*
@@ -1744,6 +1747,7 @@ static void bch_btree_gc(struct cache_set *c)
btree_gc_start(c);
+ /* if CACHE_SET_IO_DISABLE set, gc thread should stop too */
do {
ret = btree_root(gc_root, c, &op, &writes, &stats);
closure_sync(&writes);
@@ -1751,7 +1755,7 @@ static void bch_btree_gc(struct cache_set *c)
if (ret && ret != -EAGAIN)
pr_warn("gc failed!");
- } while (ret);
+ } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
bch_btree_gc_finish(c);
wake_up_allocators(c);
@@ -1789,15 +1793,19 @@ static int bch_gc_thread(void *arg)
while (1) {
wait_event_interruptible(c->gc_wait,
- kthread_should_stop() || gc_should_run(c));
+ kthread_should_stop() ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags) ||
+ gc_should_run(c));
- if (kthread_should_stop())
+ if (kthread_should_stop() ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags))
break;
set_gc_sectors(c);
bch_btree_gc(c);
}
+ wait_for_kthread_stop();
return 0;
}
@@ -2170,7 +2178,7 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
if (b->key.ptr[0] != btree_ptr ||
b->seq != seq + 1) {
- op->lock = b->level;
+ op->lock = b->level;
goto out;
}
}
diff --git a/drivers/md/bcache/closure.c b/drivers/md/bcache/closure.c
index 7f12920c14f7..0e14969182c6 100644
--- a/drivers/md/bcache/closure.c
+++ b/drivers/md/bcache/closure.c
@@ -46,7 +46,7 @@ void closure_sub(struct closure *cl, int v)
}
EXPORT_SYMBOL(closure_sub);
-/**
+/*
* closure_put - decrement a closure's refcount
*/
void closure_put(struct closure *cl)
@@ -55,7 +55,7 @@ void closure_put(struct closure *cl)
}
EXPORT_SYMBOL(closure_put);
-/**
+/*
* closure_wake_up - wake up all closures on a wait list, without memory barrier
*/
void __closure_wake_up(struct closure_waitlist *wait_list)
@@ -79,9 +79,9 @@ EXPORT_SYMBOL(__closure_wake_up);
/**
* closure_wait - add a closure to a waitlist
- *
- * @waitlist will own a ref on @cl, which will be released when
+ * @waitlist: will own a ref on @cl, which will be released when
* closure_wake_up() is called on @waitlist.
+ * @cl: closure pointer.
*
*/
bool closure_wait(struct closure_waitlist *waitlist, struct closure *cl)
@@ -157,7 +157,7 @@ void closure_debug_destroy(struct closure *cl)
}
EXPORT_SYMBOL(closure_debug_destroy);
-static struct dentry *debug;
+static struct dentry *closure_debug;
static int debug_seq_show(struct seq_file *f, void *data)
{
@@ -199,11 +199,12 @@ static const struct file_operations debug_ops = {
.release = single_release
};
-void __init closure_debug_init(void)
+int __init closure_debug_init(void)
{
- debug = debugfs_create_file("closures", 0400, NULL, NULL, &debug_ops);
+ closure_debug = debugfs_create_file("closures",
+ 0400, bcache_debug, NULL, &debug_ops);
+ return IS_ERR_OR_NULL(closure_debug);
}
-
#endif
MODULE_AUTHOR("Kent Overstreet <koverstreet@google.com>");
diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h
index 3b9dfc9962ad..71427eb5fdae 100644
--- a/drivers/md/bcache/closure.h
+++ b/drivers/md/bcache/closure.h
@@ -105,6 +105,7 @@
struct closure;
struct closure_syncer;
typedef void (closure_fn) (struct closure *);
+extern struct dentry *bcache_debug;
struct closure_waitlist {
struct llist_head list;
@@ -185,13 +186,13 @@ static inline void closure_sync(struct closure *cl)
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
-void closure_debug_init(void);
+int closure_debug_init(void);
void closure_debug_create(struct closure *cl);
void closure_debug_destroy(struct closure *cl);
#else
-static inline void closure_debug_init(void) {}
+static inline int closure_debug_init(void) { return 0; }
static inline void closure_debug_create(struct closure *cl) {}
static inline void closure_debug_destroy(struct closure *cl) {}
diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c
index af89408befe8..028f7b386e01 100644
--- a/drivers/md/bcache/debug.c
+++ b/drivers/md/bcache/debug.c
@@ -17,7 +17,7 @@
#include <linux/random.h>
#include <linux/seq_file.h>
-static struct dentry *debug;
+struct dentry *bcache_debug;
#ifdef CONFIG_BCACHE_DEBUG
@@ -232,11 +232,11 @@ static const struct file_operations cache_set_debug_ops = {
void bch_debug_init_cache_set(struct cache_set *c)
{
- if (!IS_ERR_OR_NULL(debug)) {
+ if (!IS_ERR_OR_NULL(bcache_debug)) {
char name[50];
snprintf(name, 50, "bcache-%pU", c->sb.set_uuid);
- c->debug = debugfs_create_file(name, 0400, debug, c,
+ c->debug = debugfs_create_file(name, 0400, bcache_debug, c,
&cache_set_debug_ops);
}
}
@@ -245,13 +245,13 @@ void bch_debug_init_cache_set(struct cache_set *c)
void bch_debug_exit(void)
{
- if (!IS_ERR_OR_NULL(debug))
- debugfs_remove_recursive(debug);
+ if (!IS_ERR_OR_NULL(bcache_debug))
+ debugfs_remove_recursive(bcache_debug);
}
int __init bch_debug_init(struct kobject *kobj)
{
- debug = debugfs_create_dir("bcache", NULL);
+ bcache_debug = debugfs_create_dir("bcache", NULL);
- return IS_ERR_OR_NULL(debug);
+ return IS_ERR_OR_NULL(bcache_debug);
}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index f9d391711595..c334e6666461 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -534,7 +534,6 @@ err:
static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
{
struct btree *b = container_of(bk, struct btree, keys);
- struct bucket *g;
unsigned i, stale;
if (!KEY_PTRS(k) ||
@@ -549,7 +548,6 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
return false;
for (i = 0; i < KEY_PTRS(k); i++) {
- g = PTR_BUCKET(b->c, k, i);
stale = ptr_stale(b->c, k, i);
btree_bug_on(stale > 96, b,
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index a783c5a41ff1..7fac97ae036e 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -38,7 +38,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c)
bio_set_dev(bio, PTR_CACHE(c, &b->key, 0)->bdev);
b->submit_time_us = local_clock_us();
- closure_bio_submit(bio, bio->bi_private);
+ closure_bio_submit(c, bio, bio->bi_private);
}
void bch_submit_bbio(struct bio *bio, struct cache_set *c,
@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
}
/* IO errors */
+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
+{
+ char buf[BDEVNAME_SIZE];
+ unsigned errors;
+
+ WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
+
+ errors = atomic_add_return(1, &dc->io_errors);
+ if (errors < dc->error_limit)
+ pr_err("%s: IO error on backing device, unrecoverable",
+ bio_devname(bio, buf));
+ else
+ bch_cached_dev_error(dc);
+}
void bch_count_io_errors(struct cache *ca,
blk_status_t error,
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 1b736b860739..18f1b5239620 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -62,7 +62,7 @@ reread: left = ca->sb.bucket_size - offset;
bio_set_op_attrs(bio, REQ_OP_READ, 0);
bch_bio_map(bio, data);
- closure_bio_submit(bio, &cl);
+ closure_bio_submit(ca->set, bio, &cl);
closure_sync(&cl);
/* This function could be simpler now since we no longer write
@@ -493,7 +493,7 @@ static void journal_reclaim(struct cache_set *c)
struct cache *ca;
uint64_t last_seq;
unsigned iter, n = 0;
- atomic_t p;
+ atomic_t p __maybe_unused;
atomic_long_inc(&c->reclaim);
@@ -594,6 +594,7 @@ static void journal_write_done(struct closure *cl)
}
static void journal_write_unlock(struct closure *cl)
+ __releases(&c->journal.lock)
{
struct cache_set *c = container_of(cl, struct cache_set, journal.io);
@@ -674,7 +675,7 @@ static void journal_write_unlocked(struct closure *cl)
spin_unlock(&c->journal.lock);
while ((bio = bio_list_pop(&list)))
- closure_bio_submit(bio, cl);
+ closure_bio_submit(c, bio, cl);
continue_at(cl, journal_write_done, NULL);
}
@@ -705,6 +706,7 @@ static void journal_try_write(struct cache_set *c)
static struct journal_write *journal_wait_for_write(struct cache_set *c,
unsigned nkeys)
+ __acquires(&c->journal.lock)
{
size_t sectors;
struct closure cl;
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 6422846b546e..a65e3365eeb9 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -139,6 +139,7 @@ static void bch_data_invalidate(struct closure *cl)
}
op->insert_data_done = true;
+ /* get in bch_data_insert() */
bio_put(bio);
out:
continue_at(cl, bch_data_insert_keys, op->wq);
@@ -295,6 +296,7 @@ err:
/**
* bch_data_insert - stick some data in the cache
+ * @cl: closure pointer.
*
* This is the starting point for any data to end up in a cache device; it could
* be from a normal write, or a writeback write, or a write to a flash only
@@ -630,6 +632,41 @@ static void request_endio(struct bio *bio)
closure_put(cl);
}
+static void backing_request_endio(struct bio *bio)
+{
+ struct closure *cl = bio->bi_private;
+
+ if (bio->bi_status) {
+ struct search *s = container_of(cl, struct search, cl);
+ struct cached_dev *dc = container_of(s->d,
+ struct cached_dev, disk);
+ /*
+ * If a bio has REQ_PREFLUSH for writeback mode, it is
+ * speically assembled in cached_dev_write() for a non-zero
+ * write request which has REQ_PREFLUSH. we don't set
+ * s->iop.status by this failure, the status will be decided
+ * by result of bch_data_insert() operation.
+ */
+ if (unlikely(s->iop.writeback &&
+ bio->bi_opf & REQ_PREFLUSH)) {
+ char buf[BDEVNAME_SIZE];
+
+ bio_devname(bio, buf);
+ pr_err("Can't flush %s: returned bi_status %i",
+ buf, bio->bi_status);
+ } else {
+ /* set to orig_bio->bi_status in bio_complete() */
+ s->iop.status = bio->bi_status;
+ }
+ s->recoverable = false;
+ /* should count I/O error for backing device here */
+ bch_count_backing_io_errors(dc, bio);
+ }
+
+ bio_put(bio);
+ closure_put(cl);
+}
+
static void bio_complete(struct search *s)
{
if (s->orig_bio) {
@@ -644,13 +681,21 @@ static void bio_complete(struct search *s)
}
}
-static void do_bio_hook(struct search *s, struct bio *orig_bio)
+static void do_bio_hook(struct search *s,
+ struct bio *orig_bio,
+ bio_end_io_t *end_io_fn)
{
struct bio *bio = &s->bio.bio;
bio_init(bio, NULL, 0);
__bio_clone_fast(bio, orig_bio);
- bio->bi_end_io = request_endio;
+ /*
+ * bi_end_io can be set separately somewhere else, e.g. the
+ * variants in,
+ * - cache_bio->bi_end_io from cached_dev_cache_miss()
+ * - n->bi_end_io from cache_lookup_fn()
+ */
+ bio->bi_end_io = end_io_fn;
bio->bi_private = &s->cl;
bio_cnt_set(bio, 3);
@@ -676,7 +721,7 @@ static inline struct search *search_alloc(struct bio *bio,
s = mempool_alloc(d->c->search, GFP_NOIO);
closure_init(&s->cl, NULL);
- do_bio_hook(s, bio);
+ do_bio_hook(s, bio, request_endio);
s->orig_bio = bio;
s->cache_miss = NULL;
@@ -743,11 +788,12 @@ static void cached_dev_read_error(struct closure *cl)
trace_bcache_read_retry(s->orig_bio);
s->iop.status = 0;
- do_bio_hook(s, s->orig_bio);
+ do_bio_hook(s, s->orig_bio, backing_request_endio);
/* XXX: invalidate cache */
- closure_bio_submit(bio, cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, bio, cl);
}
continue_at(cl, cached_dev_cache_miss_done, NULL);
@@ -859,7 +905,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
bio_copy_dev(cache_bio, miss);
cache_bio->bi_iter.bi_size = s->insert_bio_sectors << 9;
- cache_bio->bi_end_io = request_endio;
+ cache_bio->bi_end_io = backing_request_endio;
cache_bio->bi_private = &s->cl;
bch_bio_map(cache_bio, NULL);
@@ -872,15 +918,17 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s,
s->cache_miss = miss;
s->iop.bio = cache_bio;
bio_get(cache_bio);
- closure_bio_submit(cache_bio, &s->cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, cache_bio, &s->cl);
return ret;
out_put:
bio_put(cache_bio);
out_submit:
- miss->bi_end_io = request_endio;
+ miss->bi_end_io = backing_request_endio;
miss->bi_private = &s->cl;
- closure_bio_submit(miss, &s->cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, miss, &s->cl);
return ret;
}
@@ -943,31 +991,46 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
s->iop.bio = s->orig_bio;
bio_get(s->iop.bio);
- if ((bio_op(bio) != REQ_OP_DISCARD) ||
- blk_queue_discard(bdev_get_queue(dc->bdev)))
- closure_bio_submit(bio, cl);
+ if (bio_op(bio) == REQ_OP_DISCARD &&
+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
+ goto insert_data;
+
+ /* I/O request sent to backing device */
+ bio->bi_end_io = backing_request_endio;
+ closure_bio_submit(s->iop.c, bio, cl);
+
} else if (s->iop.writeback) {
bch_writeback_add(dc);
s->iop.bio = bio;
if (bio->bi_opf & REQ_PREFLUSH) {
- /* Also need to send a flush to the backing device */
- struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
- dc->disk.bio_split);
-
+ /*
+ * Also need to send a flush to the backing
+ * device.
+ */
+ struct bio *flush;
+
+ flush = bio_alloc_bioset(GFP_NOIO, 0,
+ dc->disk.bio_split);
+ if (!flush) {
+ s->iop.status = BLK_STS_RESOURCE;
+ goto insert_data;
+ }
bio_copy_dev(flush, bio);
- flush->bi_end_io = request_endio;
+ flush->bi_end_io = backing_request_endio;
flush->bi_private = cl;
flush->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
-
- closure_bio_submit(flush, cl);
+ /* I/O request sent to backing device */
+ closure_bio_submit(s->iop.c, flush, cl);
}
} else {
s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split);
-
- closure_bio_submit(bio, cl);
+ /* I/O request sent to backing device */
+ bio->bi_end_io = backing_request_endio;
+ closure_bio_submit(s->iop.c, bio, cl);
}
+insert_data:
closure_call(&s->iop.cl, bch_data_insert, NULL, cl);
continue_at(cl, cached_dev_write_complete, NULL);
}
@@ -981,11 +1044,67 @@ static void cached_dev_nodata(struct closure *cl)
bch_journal_meta(s->iop.c, cl);
/* If it's a flush, we send the flush to the backing device too */
- closure_bio_submit(bio, cl);
+ bio->bi_end_io = backing_request_endio;
+ closure_bio_submit(s->iop.c, bio, cl);
continue_at(cl, cached_dev_bio_complete, NULL);
}
+struct detached_dev_io_private {
+ struct bcache_device *d;
+ unsigned long start_time;
+ bio_end_io_t *bi_end_io;
+ void *bi_private;
+};
+
+static void detached_dev_end_io(struct bio *bio)
+{
+ struct detached_dev_io_private *ddip;
+
+ ddip = bio->bi_private;
+ bio->bi_end_io = ddip->bi_end_io;
+ bio->bi_private = ddip->bi_private;
+
+ generic_end_io_acct(ddip->d->disk->queue,
+ bio_data_dir(bio),
+ &ddip->d->disk->part0, ddip->start_time);
+
+ if (bio->bi_status) {
+ struct cached_dev *dc = container_of(ddip->d,
+ struct cached_dev, disk);
+ /* should count I/O error for backing device here */
+ bch_count_backing_io_errors(dc, bio);
+ }
+
+ kfree(ddip);
+ bio->bi_end_io(bio);
+}
+
+static void detached_dev_do_request(struct bcache_device *d, struct bio *bio)
+{
+ struct detached_dev_io_private *ddip;
+ struct cached_dev *dc = container_of(d, struct cached_dev, disk);
+
+ /*
+ * no need to call closure_get(&dc->disk.cl),
+ * because upper layer had already opened bcache device,
+ * which would call closure_get(&dc->disk.cl)
+ */
+ ddip = kzalloc(sizeof(struct detached_dev_io_private), GFP_NOIO);
+ ddip->d = d;
+ ddip->start_time = jiffies;
+ ddip->bi_end_io = bio->bi_end_io;
+ ddip->bi_private = bio->bi_private;
+ bio->bi_end_io = detached_dev_end_io;
+ bio->bi_private = ddip;
+
+ if ((bio_op(bio) == REQ_OP_DISCARD) &&
+ !blk_queue_discard(bdev_get_queue(dc->bdev)))
+ bio->bi_end_io(bio);
+ else
+ generic_make_request(bio);
+}
+
/* Cached devices - read & write stuff */
static blk_qc_t cached_dev_make_request(struct request_queue *q,
@@ -996,6 +1115,13 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
int rw = bio_data_dir(bio);
+ if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
+ dc->io_disable)) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }
+
atomic_set(&dc->backing_idle, 0);
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
@@ -1022,13 +1148,9 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
else
cached_dev_read(dc, s);
}
- } else {
- if ((bio_op(bio) == REQ_OP_DISCARD) &&
- !blk_queue_discard(bdev_get_queue(dc->bdev)))
- bio_endio(bio);
- else
- generic_make_request(bio);
- }
+ } else
+ /* I/O request sent to backing device */
+ detached_dev_do_request(d, bio);
return BLK_QC_T_NONE;
}
@@ -1112,6 +1234,12 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
struct bcache_device *d = bio->bi_disk->private_data;
int rw = bio_data_dir(bio);
+ if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ return BLK_QC_T_NONE;
+ }
+
generic_start_io_acct(q, rw, bio_sectors(bio), &d->disk->part0);
s = search_alloc(bio, d);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index f2273143b3cb..d90d9e59ca00 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -47,6 +47,14 @@ const char * const bch_cache_modes[] = {
NULL
};
+/* Default is -1; we skip past it for stop_when_cache_s