diff options
Diffstat (limited to 'drivers/md')
69 files changed, 725 insertions, 491 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 4a249ee86364..83b9362be09c 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -178,7 +178,7 @@ config MD_FAULTY config MD_CLUSTER - tristate "Cluster Support for MD (EXPERIMENTAL)" + tristate "Cluster Support for MD" depends on BLK_DEV_MD depends on DLM default n @@ -188,7 +188,8 @@ config MD_CLUSTER nodes in the cluster can access the MD devices simultaneously. This brings the redundancy (and uptime) of RAID levels across the - nodes of the cluster. + nodes of the cluster. Currently, it can work with raid1 and raid10 + (limited support). If unsure, say N. diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 786ec9e86d65..f701bb211783 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the kernel software RAID and LVM drivers. # @@ -18,9 +19,12 @@ dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o \ dm-cache-smq-y += dm-cache-policy-smq.o dm-era-y += dm-era-target.o dm-verity-y += dm-verity-target.o -md-mod-y += md.o bitmap.o +md-mod-y += md.o md-bitmap.o raid456-y += raid5.o raid5-cache.o raid5-ppl.o dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o +linear-y += md-linear.o +multipath-y += md-multipath.o +faulty-y += md-faulty.o # Note: link order is important. All raid personalities # and must come before md.o, as they each initialise diff --git a/drivers/md/bcache/Makefile b/drivers/md/bcache/Makefile index c488b846f831..d26b35195825 100644 --- a/drivers/md/bcache/Makefile +++ b/drivers/md/bcache/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_BCACHE) += bcache.o diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index cacbe2dbd5c3..a27d85232ce1 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Primary bucket allocation code * @@ -406,7 +407,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait) finish_wait(&ca->set->bucket_wait, &w); out: - wake_up_process(ca->alloc_thread); + if (ca->alloc_thread) + wake_up_process(ca->alloc_thread); trace_bcache_alloc(ca, reserve); @@ -441,6 +443,11 @@ out: b->prio = INITIAL_PRIO; } + if (ca->set->avail_nbuckets > 0) { + ca->set->avail_nbuckets--; + bch_update_bucket_in_use(ca->set, &ca->set->gc_stats); + } + return r; } @@ -448,6 +455,11 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b) { SET_GC_MARK(b, 0); SET_GC_SECTORS_USED(b, 0); + + if (ca->set->avail_nbuckets < ca->set->nbuckets) { + ca->set->avail_nbuckets++; + bch_update_bucket_in_use(ca->set, &ca->set->gc_stats); + } } void bch_bucket_free(struct cache_set *c, struct bkey *k) @@ -600,7 +612,7 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors, /* * If we had to allocate, we might race and not need to allocate the - * second time we call find_data_bucket(). If we allocated a bucket but + * second time we call pick_data_bucket(). If we allocated a bucket but * didn't use it, drop the refcount bch_bucket_alloc_set() took: */ if (KEY_PTRS(&alloc.key)) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 2ed9bd231d84..843877e017e1 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHE_H #define _BCACHE_H @@ -184,6 +185,7 @@ #include <linux/mutex.h> #include <linux/rbtree.h> #include <linux/rwsem.h> +#include <linux/refcount.h> #include <linux/types.h> #include <linux/workqueue.h> @@ -265,9 +267,6 @@ struct bcache_device { atomic_t *stripe_sectors_dirty; unsigned long *full_dirty_stripes; - unsigned long sectors_dirty_last; - long sectors_dirty_derivative; - struct bio_set *bio_split; unsigned data_csum:1; @@ -299,7 +298,7 @@ struct cached_dev { struct semaphore sb_write_mutex; /* Refcount on the cache set. Always nonzero when we're caching. */ - atomic_t count; + refcount_t count; struct work_struct detach; /* @@ -362,12 +361,14 @@ struct cached_dev { uint64_t writeback_rate_target; int64_t writeback_rate_proportional; - int64_t writeback_rate_derivative; - int64_t writeback_rate_change; + int64_t writeback_rate_integral; + int64_t writeback_rate_integral_scaled; + int32_t writeback_rate_change; unsigned writeback_rate_update_seconds; - unsigned writeback_rate_d_term; + unsigned writeback_rate_i_term_inverse; unsigned writeback_rate_p_term_inverse; + unsigned writeback_rate_minimum; }; enum alloc_reserve { @@ -581,6 +582,7 @@ struct cache_set { uint8_t need_gc; struct gc_stat gc_stats; size_t nbuckets; + size_t avail_nbuckets; struct task_struct *gc_thread; /* Where in the btree gc currently is */ @@ -806,13 +808,13 @@ do { \ static inline void cached_dev_put(struct cached_dev *dc) { - if (atomic_dec_and_test(&dc->count)) + if (refcount_dec_and_test(&dc->count)) schedule_work(&dc->detach); } static inline bool cached_dev_get(struct cached_dev *dc) { - if (!atomic_inc_not_zero(&dc->count)) + if (!refcount_inc_not_zero(&dc->count)) return false; /* Paired with the mb in cached_dev_attach */ diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 18526d44688d..e56d3ecdbfcb 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Code for working with individual keys, and sorted sets of keys with in a * btree node diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h index ae964624efb2..fa506c1aa524 100644 --- a/drivers/md/bcache/bset.h +++ b/drivers/md/bcache/bset.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHE_BSET_H #define _BCACHE_BSET_H diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 866dcf78ff8e..11c5503d31dc 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> * @@ -1240,6 +1241,11 @@ void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k) __bch_btree_mark_key(c, level, k); } +void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats) +{ + stats->in_use = (c->nbuckets - c->avail_nbuckets) * 100 / c->nbuckets; +} + static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) { uint8_t stale = 0; @@ -1651,9 +1657,8 @@ static void btree_gc_start(struct cache_set *c) mutex_unlock(&c->bucket_lock); } -static size_t bch_btree_gc_finish(struct cache_set *c) +static void bch_btree_gc_finish(struct cache_set *c) { - size_t available = 0; struct bucket *b; struct cache *ca; unsigned i; @@ -1690,6 +1695,7 @@ static size_t bch_btree_gc_finish(struct cache_set *c) } rcu_read_unlock(); + c->avail_nbuckets = 0; for_each_cache(ca, c, i) { uint64_t *i; @@ -1711,18 +1717,16 @@ static size_t bch_btree_gc_finish(struct cache_set *c) BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b)); if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE) - available++; + c->avail_nbuckets++; } } mutex_unlock(&c->bucket_lock); - return available; } static void bch_btree_gc(struct cache_set *c) { int ret; - unsigned long available; struct gc_stat stats; struct closure writes; struct btree_op op; @@ -1745,14 +1749,14 @@ static void bch_btree_gc(struct cache_set *c) pr_warn("gc failed!"); } while (ret); - available = bch_btree_gc_finish(c); + bch_btree_gc_finish(c); wake_up_allocators(c); bch_time_stats_update(&c->btree_gc_time, start_time); stats.key_bytes *= sizeof(uint64_t); stats.data <<= 9; - stats.in_use = (c->nbuckets - available) * 100 / c->nbuckets; + bch_update_bucket_in_use(c, &stats); memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat)); trace_bcache_gc_end(c); diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 73da1f5626cb..d211e2c25b6b 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHE_BTREE_H #define _BCACHE_BTREE_H @@ -305,5 +306,5 @@ void bch_keybuf_del(struct keybuf *, struct keybuf_key *); struct keybuf_key *bch_keybuf_next(struct keybuf *); struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *, struct bkey *, keybuf_pred_fn *); - +void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats); #endif diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 295b7e43f92c..ccfbea6f9f6b 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_CLOSURE_H #define _LINUX_CLOSURE_H @@ -251,6 +252,12 @@ static inline void set_closure_fn(struct closure *cl, closure_fn *fn, static inline void closure_queue(struct closure *cl) { struct workqueue_struct *wq = cl->wq; + /** + * Changes made to closure, work_struct, or a couple of other structs + * may cause work.func not pointing to the right location. + */ + BUILD_BUG_ON(offsetof(struct closure, fn) + != offsetof(struct work_struct, func)); if (wq) { INIT_WORK(&cl->work, cl->work.func); BUG_ON(!queue_work(wq, &cl->work)); diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 61076eda2e6d..c7a02c4900da 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Assorted bcache debug code * diff --git a/drivers/md/bcache/debug.h b/drivers/md/bcache/debug.h index 1f63c195d247..acc48d3fa274 100644 --- a/drivers/md/bcache/debug.h +++ b/drivers/md/bcache/debug.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHE_DEBUG_H #define _BCACHE_DEBUG_H diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 243de0bf15cd..41c238fc3733 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Kent Overstreet <kent.overstreet@gmail.com> * diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h index e2ed54054e7a..0cd3575afa1d 100644 --- a/drivers/md/bcache/extents.h +++ b/drivers/md/bcache/extents.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHE_EXTENTS_H #define _BCACHE_EXTENTS_H diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 7e871bdc0097..fac97ec2d0e2 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Some low level IO code, and hacks for various block layer limitations * diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 7e1d1c3ba33a..02a98ddb592d 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * bcache journalling code, for btree insertions * diff --git a/drivers/md/bcache/journal.h b/drivers/md/bcache/journal.h index e3c39457afbb..b5788199188f 100644 --- a/drivers/md/bcache/journal.h +++ b/drivers/md/bcache/journal.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _BCACHE_JOURNAL_H #define _BCACHE_JOURNAL_H diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index f633b30c962e..d50c1c97da68 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Moving/copying garbage collector * diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 681b4f12b05a..3a7aed7282b2 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Main bcache entry point - handle a read or a write request and decide what to * do with it; the make_request functions are called by the block layer. @@ -26,12 +27,12 @@ struct kmem_cache *bch_search_cache; static void bch_data_insert_start(struct closure *); -static unsigned cache_mode(struct cached_dev *dc, struct bio *bio) +static unsigned cache_mode(struct cached_dev *dc) { return BDEV_CACHE_MODE(&dc->sb); } -static bool verify(struct cached_dev *dc, struct bio *bio) +static bool verify(struct cached_dev *dc) { return dc->verify; } @@ -369,7 +370,7 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) { struct cache_set *c = dc->disk.c; - unsigned mode = cache_mode(dc, bio); + unsigned mode = cache_mode(dc); unsigned sectors, congested = bch_get_congested(c); struct task_struct *task = current; struct io *i; @@ -384,6 +385,14 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) op_is_write(bio_op(bio)))) goto skip; + /* + * Flag for bypass if the IO is for read-ahead or background, + * unless the read-ahead request is for metadata (eg, for gfs2). + */ + if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && + !(bio->bi_opf & REQ_META)) + goto skip; + if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || bio_sectors(bio) & (c->sb.block_size - 1)) { pr_debug("skipping unaligned io"); @@ -462,6 +471,7 @@ struct search { unsigned recoverable:1; unsigned write:1; unsigned read_dirty_data:1; + unsigned cache_missed:1; unsigned long start_time; @@ -648,6 +658,7 @@ static inline struct search *search_alloc(struct bio *bio, s->orig_bio = bio; s->cache_miss = NULL; + s->cache_missed = 0; s->d = d; s->recoverable = 1; s->write = op_is_write(bio_op(bio)); @@ -697,8 +708,16 @@ static void cached_dev_read_error(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); struct bio *bio = &s->bio.bio; + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); - if (s->recoverable) { + /* + * If cache device is dirty (dc->has_dirty is non-zero), then + * recovery a failed read request from cached device may get a + * stale data back. So read failure recovery is only permitted + * when cache device is clean. + */ + if (s->recoverable && + (dc && !atomic_read(&dc->has_dirty))) { /* Retry from the backing device: */ trace_bcache_read_retry(s->orig_bio); @@ -739,7 +758,7 @@ static void cached_dev_read_done(struct closure *cl) s->cache_miss = NULL; } - if (verify(dc, &s->bio.bio) && s->recoverable && !s->read_dirty_data) + if (verify(dc) && s->recoverable && !s->read_dirty_data) bch_data_verify(dc, s->orig_bio); bio_complete(s); @@ -759,12 +778,12 @@ static void cached_dev_read_done_bh(struct closure *cl) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); bch_mark_cache_accounting(s->iop.c |