diff options
author | Shaohua Li <shli@fb.com> | 2016-12-13 12:40:15 -0800 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2016-12-13 12:40:15 -0800 |
commit | 20737738d397dfadbca1ea50dcc00d7259f500cf (patch) | |
tree | 5765b1815331bac9ca32208963c850e60806d6de /drivers/md | |
parent | b78b499a67c3f77aeb6cd0b54724bc38b141255d (diff) | |
parent | 2953079c692da067aeb6345659875b97378f9b0a (diff) |
Merge branch 'md-next' into md-linus
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 166 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 4 | ||||
-rw-r--r-- | drivers/md/linear.c | 31 | ||||
-rw-r--r-- | drivers/md/md.c | 701 | ||||
-rw-r--r-- | drivers/md/md.h | 108 | ||||
-rw-r--r-- | drivers/md/multipath.c | 92 | ||||
-rw-r--r-- | drivers/md/raid0.c | 107 | ||||
-rw-r--r-- | drivers/md/raid1.c | 247 | ||||
-rw-r--r-- | drivers/md/raid1.h | 19 | ||||
-rw-r--r-- | drivers/md/raid10.c | 295 | ||||
-rw-r--r-- | drivers/md/raid10.h | 2 | ||||
-rw-r--r-- | drivers/md/raid5-cache.c | 1833 | ||||
-rw-r--r-- | drivers/md/raid5.c | 623 | ||||
-rw-r--r-- | drivers/md/raid5.h | 172 |
14 files changed, 3168 insertions, 1232 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 2d826927a3bf..9fb2ccac958a 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -27,6 +27,7 @@ #include <linux/mount.h> #include <linux/buffer_head.h> #include <linux/seq_file.h> +#include <trace/events/block.h> #include "md.h" #include "bitmap.h" @@ -208,11 +209,13 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) { - struct md_rdev *rdev = NULL; + struct md_rdev *rdev; struct block_device *bdev; struct mddev *mddev = bitmap->mddev; struct bitmap_storage *store = &bitmap->storage; +restart: + rdev = NULL; while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; loff_t offset = mddev->bitmap_info.offset; @@ -268,8 +271,8 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) page); } - if (wait) - md_super_wait(mddev); + if (wait && md_super_wait(mddev) < 0) + goto restart; return 0; bad_alignment: @@ -405,10 +408,10 @@ static int read_page(struct file *file, unsigned long index, ret = -EIO; out: if (ret) - printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n", - (int)PAGE_SIZE, - (unsigned long long)index << PAGE_SHIFT, - ret); + pr_err("md: bitmap read error: (%dB @ %llu): %d\n", + (int)PAGE_SIZE, + (unsigned long long)index << PAGE_SHIFT, + ret); return ret; } @@ -416,6 +419,28 @@ out: * bitmap file superblock operations */ +/* + * bitmap_wait_writes() should be called before writing any bitmap + * blocks, to ensure previous writes, particularly from + * bitmap_daemon_work(), have completed. + */ +static void bitmap_wait_writes(struct bitmap *bitmap) +{ + if (bitmap->storage.file) + wait_event(bitmap->write_wait, + atomic_read(&bitmap->pending_writes)==0); + else + /* Note that we ignore the return value. The writes + * might have failed, but that would just mean that + * some bits which should be cleared haven't been, + * which is safe. The relevant bitmap blocks will + * probably get written again, but there is no great + * loss if they aren't. + */ + md_super_wait(bitmap->mddev); +} + + /* update the event counter and sync the superblock to disk */ void bitmap_update_sb(struct bitmap *bitmap) { @@ -455,24 +480,24 @@ void bitmap_print_sb(struct bitmap *bitmap) if (!bitmap || !bitmap->storage.sb_page) return; sb = kmap_atomic(bitmap->storage.sb_page); - printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap)); - printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic)); - printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version)); - printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n", - *(__u32 *)(sb->uuid+0), - *(__u32 *)(sb->uuid+4), - *(__u32 *)(sb->uuid+8), - *(__u32 *)(sb->uuid+12)); - printk(KERN_DEBUG " events: %llu\n", - (unsigned long long) le64_to_cpu(sb->events)); - printk(KERN_DEBUG "events cleared: %llu\n", - (unsigned long long) le64_to_cpu(sb->events_cleared)); - printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state)); - printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize)); - printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); - printk(KERN_DEBUG " sync size: %llu KB\n", - (unsigned long long)le64_to_cpu(sb->sync_size)/2); - printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind)); + pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); + pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); + pr_debug(" version: %d\n", le32_to_cpu(sb->version)); + pr_debug(" uuid: %08x.%08x.%08x.%08x\n", + *(__u32 *)(sb->uuid+0), + *(__u32 *)(sb->uuid+4), + *(__u32 *)(sb->uuid+8), + *(__u32 *)(sb->uuid+12)); + pr_debug(" events: %llu\n", + (unsigned long long) le64_to_cpu(sb->events)); + pr_debug("events cleared: %llu\n", + (unsigned long long) le64_to_cpu(sb->events_cleared)); + pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); + pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize)); + pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep)); + pr_debug(" sync size: %llu KB\n", + (unsigned long long)le64_to_cpu(sb->sync_size)/2); + pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind)); kunmap_atomic(sb); } @@ -506,14 +531,14 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) BUG_ON(!chunksize); if (!is_power_of_2(chunksize)) { kunmap_atomic(sb); - printk(KERN_ERR "bitmap chunksize not a power of 2\n"); + pr_warn("bitmap chunksize not a power of 2\n"); return -EINVAL; } sb->chunksize = cpu_to_le32(chunksize); daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { - printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n"); + pr_debug("Choosing daemon_sleep default (5 sec)\n"); daemon_sleep = 5 * HZ; } sb->daemon_sleep = cpu_to_le32(daemon_sleep); @@ -584,7 +609,7 @@ re_read: /* to 4k blocks */ bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3)); - pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, + pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, bitmap->cluster_slot, offset); } @@ -634,7 +659,7 @@ re_read: else if (write_behind > COUNTER_MAX) reason = "write-behind limit out of range (0 - 16383)"; if (reason) { - printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n", + pr_warn("%s: invalid bitmap file superblock: %s\n", bmname(bitmap), reason); goto out; } @@ -648,18 +673,15 @@ re_read: * bitmap's UUID and event counter to the mddev's */ if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { - printk(KERN_INFO - "%s: bitmap superblock UUID mismatch\n", - bmname(bitmap)); + pr_warn("%s: bitmap superblock UUID mismatch\n", + bmname(bitmap)); goto out; } events = le64_to_cpu(sb->events); if (!nodes && (events < bitmap->mddev->events)) { - printk(KERN_INFO - "%s: bitmap file is out of date (%llu < %llu) " - "-- forcing full recovery\n", - bmname(bitmap), events, - (unsigned long long) bitmap->mddev->events); + pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", + bmname(bitmap), events, + (unsigned long long) bitmap->mddev->events); set_bit(BITMAP_STALE, &bitmap->flags); } } @@ -679,8 +701,8 @@ out: if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { err = md_setup_cluster(bitmap->mddev, nodes); if (err) { - pr_err("%s: Could not setup cluster service (%d)\n", - bmname(bitmap), err); + pr_warn("%s: Could not setup cluster service (%d)\n", + bmname(bitmap), err); goto out_no_sb; } bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev); @@ -847,15 +869,13 @@ static void bitmap_file_kick(struct bitmap *bitmap) ptr = file_path(bitmap->storage.file, path, PAGE_SIZE); - printk(KERN_ALERT - "%s: kicking failed bitmap file %s from array!\n", - bmname(bitmap), IS_ERR(ptr) ? "" : ptr); + pr_warn("%s: kicking failed bitmap file %s from array!\n", + bmname(bitmap), IS_ERR(ptr) ? "" : ptr); kfree(path); } else - printk(KERN_ALERT - "%s: disabling internal bitmap due to errors\n", - bmname(bitmap)); + pr_warn("%s: disabling internal bitmap due to errors\n", + bmname(bitmap)); } } @@ -983,6 +1003,7 @@ void bitmap_unplug(struct bitmap *bitmap) { unsigned long i; int dirty, need_write; + int writing = 0; if (!bitmap || !bitmap->storage.filemap || test_bit(BITMAP_STALE, &bitmap->flags)) @@ -997,15 +1018,19 @@ void bitmap_unplug(struct bitmap *bitmap) need_write = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); if (dirty || need_write) { + if (!writing) { + bitmap_wait_writes(bitmap); + if (bitmap->mddev->queue) + blk_add_trace_msg(bitmap->mddev->queue, + "md bitmap_unplug"); + } clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); write_page(bitmap, bitmap->storage.filemap[i], 0); + writing = 1; } } - if (bitmap->storage.file) - wait_event(bitmap->write_wait, - atomic_read(&bitmap->pending_writes)==0); - else - md_super_wait(bitmap->mddev); + if (writing) + bitmap_wait_writes(bitmap); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) bitmap_file_kick(bitmap); @@ -1056,14 +1081,13 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) outofdate = test_bit(BITMAP_STALE, &bitmap->flags); if (outofdate) - printk(KERN_INFO "%s: bitmap file is out of date, doing full " - "recovery\n", bmname(bitmap)); + pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap)); if (file && i_size_read(file->f_mapping->host) < store->bytes) { - printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", - bmname(bitmap), - (unsigned long) i_size_read(file->f_mapping->host), - store->bytes); + pr_warn("%s: bitmap file too short %lu < %lu\n", + bmname(bitmap), + (unsigned long) i_size_read(file->f_mapping->host), + store->bytes); goto err; } @@ -1137,16 +1161,15 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) offset = 0; } - printk(KERN_INFO "%s: bitmap initialized from disk: " - "read %lu pages, set %lu of %lu bits\n", - bmname(bitmap), store->file_pages, - bit_cnt, chunks); + pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", + bmname(bitmap), store->file_pages, + bit_cnt, chunks); return 0; err: - printk(KERN_INFO "%s: bitmap initialisation failed: %d\n", - bmname(bitmap), ret); + pr_warn("%s: bitmap initialisation failed: %d\n", + bmname(bitmap), ret); return ret; } @@ -1225,6 +1248,10 @@ void bitmap_daemon_work(struct mddev *mddev) } bitmap->allclean = 1; + if (bitmap->mddev->queue) + blk_add_trace_msg(bitmap->mddev->queue, + "md bitmap_daemon_work"); + /* Any file-page which is PENDING now needs to be written. * So set NEEDWRITE now, then after we make any last-minute changes * we will write it. @@ -1289,6 +1316,7 @@ void bitmap_daemon_work(struct mddev *mddev) } spin_unlock_irq(&counts->lock); + bitmap_wait_writes(bitmap); /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. * DIRTY pages need to be written by bitmap_unplug so it can wait * for them. @@ -1595,7 +1623,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force) atomic_read(&bitmap->mddev->recovery_active) == 0); bitmap->mddev->curr_resync_completed = sector; - set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); + set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { @@ -1825,8 +1853,8 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot) if (err) goto error; - printk(KERN_INFO "created bitmap (%lu pages) for device %s\n", - bitmap->counts.pages, bmname(bitmap)); + pr_debug("created bitmap (%lu pages) for device %s\n", + bitmap->counts.pages, bmname(bitmap)); err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; if (err) @@ -2029,8 +2057,10 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, !bitmap->mddev->bitmap_info.external, mddev_is_clustered(bitmap->mddev) ? bitmap->cluster_slot : 0); - if (ret) + if (ret) { + bitmap_file_unmap(&store); goto err; + } pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); @@ -2089,7 +2119,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks, bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); blocks = old_counts.chunks << old_counts.chunkshift; - pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n"); + pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); break; } else bitmap->counts.bp[page].count += 1; @@ -2266,7 +2296,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len) /* Ensure new bitmap info is stored in * metadata promptly. */ - set_bit(MD_CHANGE_DEVS, &mddev->flags); + set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); md_wakeup_thread(mddev->thread); } rv = 0; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 6d53810963f7..953159d9a825 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -2011,7 +2011,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev) sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190); /* Force writing of superblocks to disk */ - set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags); + set_bit(MD_SB_CHANGE_DEVS, &rdev->mddev->sb_flags); /* Any superblock is better than none, choose that if given */ return refdev ? 0 : 1; @@ -3497,7 +3497,7 @@ static void rs_update_sbs(struct raid_set *rs) struct mddev *mddev = &rs->md; int ro = mddev->ro; - set_bit(MD_CHANGE_DEVS, &mddev->flags); + set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); mddev->ro = 0; md_update_sb(mddev, 1); mddev->ro = ro; diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 86f5d435901d..5975c9915684 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -21,6 +21,7 @@ #include <linux/seq_file.h> #include <linux/module.h> #include <linux/slab.h> +#include <trace/events/block.h> #include "md.h" #include "linear.h" @@ -101,8 +102,8 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) sector_t sectors; if (j < 0 || j >= raid_disks || disk->rdev) { - printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n", - mdname(mddev)); + pr_warn("md/linear:%s: disk numbering problem. Aborting!\n", + mdname(mddev)); goto out; } @@ -123,8 +124,8 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks) discard_supported = true; } if (cnt != raid_disks) { - printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n", - mdname(mddev)); + pr_warn("md/linear:%s: not enough drives present. Aborting!\n", + mdname(mddev)); goto out; } @@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) } do { - tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector); + sector_t bio_sector = bio->bi_iter.bi_sector; + tmp_dev = which_dev(mddev, bio_sector); start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; end_sector = tmp_dev->end_sector; data_offset = tmp_dev->rdev->data_offset; bio->bi_bdev = tmp_dev->rdev->bdev; - if (unlikely(bio->bi_iter.bi_sector >= end_sector || - bio->bi_iter.bi_sector < start_sector)) + if (unlikely(bio_sector >= end_sector || + bio_sector < start_sector)) goto out_of_bounds; if (unlikely(bio_end_sector(bio) > end_sector)) { /* This bio crosses a device boundary, so we have to * split it. */ - split = bio_split(bio, end_sector - - bio->bi_iter.bi_sector, + split = bio_split(bio, end_sector - bio_sector, GFP_NOIO, fs_bio_set); bio_chain(split, bio); } else { @@ -256,15 +257,18 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio) !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) { /* Just ignore it */ bio_endio(split); - } else + } else { + if (mddev->gendisk) + trace_block_bio_remap(bdev_get_queue(split->bi_bdev), + split, disk_devt(mddev->gendisk), + bio_sector); generic_make_request(split); + } } while (split != bio); return; out_of_bounds: - printk(KERN_ERR - "md/linear:%s: make_request: Sector %llu out of bounds on " - "dev %s: %llu sectors, offset %llu\n", + pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n", mdname(mddev), (unsigned long long)bio->bi_iter.bi_sector, bdevname(tmp_dev->rdev->bdev, b), @@ -275,7 +279,6 @@ out_of_bounds: static void linear_status (struct seq_file *seq, struct mddev *mddev) { - seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2); } diff --git a/drivers/md/md.c b/drivers/md/md.c index f975cd08923d..82821ee0d57f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -30,6 +30,18 @@ You should have received a copy of the GNU General Public License (for example /usr/src/linux/COPYING); if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Errors, Warnings, etc. + Please use: + pr_crit() for error conditions that risk data loss + pr_err() for error conditions that are unexpected, like an IO error + or internal inconsistency + pr_warn() for error conditions that could have been predicated, like + adding a device to an array when it has incompatible metadata + pr_info() for every interesting, very rare events, like an array starting + or stopping, or resync starting or stopping + pr_debug() for everything else. + */ #include <linux/kthread.h> @@ -52,6 +64,7 @@ #include <linux/raid/md_p.h> #include <linux/raid/md_u.h> #include <linux/slab.h> +#include <trace/events/block.h> #include "md.h" #include "bitmap.h" #include "md-cluster.h" @@ -684,11 +697,8 @@ static inline sector_t calc_dev_sboffset(struct md_rdev *rdev) static int alloc_disk_sb(struct md_rdev *rdev) { rdev->sb_page = alloc_page(GFP_KERNEL); - if (!rdev->sb_page) { - printk(KERN_ALERT "md: out of memory.\n"); + if (!rdev->sb_page) return -ENOMEM; - } - return 0; } @@ -715,9 +725,15 @@ static void super_written(struct bio *bio) struct mddev *mddev = rdev->mddev; if (bio->bi_error) { - printk("md: super_written gets error=%d\n", bio->bi_error); + pr_err("md: super_written gets error=%d\n", bio->bi_error); md_error(mddev, rdev); - } + if (!test_bit(Faulty, &rdev->flags) + && (bio->bi_opf & MD_FAILFAST)) { + set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags); + set_bit(LastDev, &rdev->flags); + } + } else + clear_bit(LastDev, &rdev->flags); if (atomic_dec_and_test(&mddev->pending_writes)) wake_up(&mddev->sb_wait); @@ -734,7 +750,13 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, * if zero is reached. * If an error occurred, call md_error */ - struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); + struct bio *bio; + int ff = 0; + + if (test_bit(Faulty, &rdev->flags)) + return; + + bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); atomic_inc(&rdev->nr_pending); @@ -743,16 +765,24 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, bio_add_page(bio, page, size, 0); bio->bi_private = rdev; bio->bi_end_io = super_written; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA; + + if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) && + test_bit(FailFast, &rdev->flags) && + !test_bit(LastDev, &rdev->flags)) + ff = MD_FAILFAST; + bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff; atomic_inc(&mddev->pending_writes); submit_bio(bio); } -void md_super_wait(struct mddev *mddev) +int md_super_wait(struct mddev *mddev) { /* wait for all superblock writes that were scheduled to complete */ wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); + if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags)) + return -EAGAIN; + return 0; } int sync_page_io(struct md_rdev *rdev, sector_t sector, int size, @@ -795,8 +825,8 @@ static int read_disk_sb(struct md_rdev *rdev, int size) return 0; fail: - printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n", - bdevname(rdev->bdev,b)); + pr_err("md: disabled device %s, could not read superblock.\n", + bdevname(rdev->bdev,b)); return -EINVAL; } @@ -818,7 +848,6 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) if (!tmp1 || !tmp2) { ret = 0; - printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n"); goto abort; } @@ -932,7 +961,7 @@ int md_check_no_bitmap(struct mddev *mddev) { if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset) return 0; - printk(KERN_ERR "%s: bitmaps are not supported for %s\n", + pr_warn("%s: bitmaps are not supported for %s\n", mdname(mddev), mddev->pers->name); return 1; } @@ -956,7 +985,8 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor rdev->sb_start = calc_dev_sboffset(rdev); ret = read_disk_sb(rdev, MD_SB_BYTES); - if (ret) return ret; + if (ret) + return ret; ret = -EINVAL; @@ -964,17 +994,15 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor sb = page_address(rdev->sb_page); if (sb->md_magic != MD_SB_MAGIC) { - printk(KERN_ERR "md: invalid raid superblock magic on %s\n", - b); + pr_warn("md: invalid raid superblock magic on %s\n", b); goto abort; } if (sb->major_version != 0 || sb->minor_version < 90 || sb->minor_version > 91) { - printk(KERN_WARNING "Bad version number %d.%d on %s\n", - sb->major_version, sb->minor_version, - b); + pr_warn("Bad version number %d.%d on %s\n", + sb->major_version, sb->minor_version, b); goto abort; } @@ -982,8 +1010,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor goto abort; if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) { - printk(KERN_WARNING "md: invalid superblock checksum on %s\n", - b); + pr_warn("md: invalid superblock checksum on %s\n", b); goto abort; } @@ -1004,14 +1031,13 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor __u64 ev1, ev2; mdp_super_t *refsb = page_address(refdev->sb_page); if (!uuid_equal(refsb, sb)) { - printk(KERN_WARNING "md: %s has different UUID to %s\n", + pr_warn("md: %s has different UUID to %s\n", b, bdevname(refdev->bdev,b2)); goto abort; } if (!sb_equal(refsb, sb)) { - printk(KERN_WARNING "md: %s has same UUID" - " but different superblock to %s\n", - b, bdevname(refdev->bdev, b2)); + pr_warn("md: %s has same UUID but different superblock to %s\n", + b, bdevname(refdev->bdev, b2)); goto abort; } ev1 = md_event(sb); @@ -1158,6 +1184,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) } if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) set_bit(WriteMostly, &rdev->flags); + if (desc->state & (1<<MD_DISK_FAILFAST)) + set_bit(FailFast, &rdev->flags); } else /* MULTIPATH are always insync */ set_bit(In_sync, &rdev->flags); return 0; @@ -1283,6 +1311,8 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) } if (test_bit(WriteMostly, &rdev2->flags)) d->state |= (1<<MD_DISK_WRITEMOSTLY); + if (test_bit(FailFast, &rdev2->flags)) + d->state |= (1<<MD_DISK_FAILFAST); } /* now set the "removed" and "faulty" bits on any missing devices */ for (i=0 ; i < mddev->raid_disks ; i++) { @@ -1324,9 +1354,10 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) && rdev->mddev->level >= 1) num_sectors = (sector_t)(2ULL << 32) - 2; - md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, + do { + md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, rdev->sb_page); - md_super_wait(rdev->mddev); + } while (md_super_wait(rdev->mddev) < 0); return num_sectors; } @@ -1413,13 +1444,13 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ return -EINVAL; if (calc_sb_1_csum(sb) != sb->sb_csum) { - printk("md: invalid superblock checksum on %s\n", + pr_warn("md: invalid superblock checksum on %s\n", bdevname(rdev->bdev,b)); return -EINVAL; } if (le64_to_cpu(sb->data_size) < 10) { - printk("md: data_size too small on %s\n", - bdevname(rdev->bdev,b)); + pr_warn("md: data_size too small on %s\n", + bdevname(rdev->bdev,b)); return -EINVAL; } if (sb->pad0 || @@ -1503,8 +1534,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ sb->level != refsb->level || sb->layout != refsb->layout || sb->chunksize != refsb->chunksize) { - printk(KERN_WARNING "md: %s has strangely different" - " superblock to %s\n", + pr_warn("md: %s has strangely different superblock to %s\n", bdevname(rdev->bdev,b), bdevname(refdev->bdev,b2)); return -EINVAL; @@ -1646,8 +1676,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) case MD_DISK_ROLE_JOURNAL: /* journal device */ if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) { /* journal device without journal feature */ - printk(KERN_WARNING - "md: journal device provided without journal feature, ignoring the device\n"); + pr_warn("md: journal device provided without journal feature, ignoring the device\n"); return -EINVAL; } set_bit(Journal, &rdev->flags); @@ -1669,6 +1698,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } if (sb->devflags & WriteMostly1) set_bit(WriteMostly, &rdev->flags); + if (sb->devflags & FailFast1) + set_bit(FailFast, &rdev->flags); if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT) set_bit(Replacement, &rdev->flags); } else /* MULTIPATH are always insync */ @@ -1707,6 +1738,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->chunksize = cpu_to_le32(mddev->chunk_sectors); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); + if (test_bit(FailFast, &rdev->flags)) + sb->devflags |= FailFast1; + else + sb->devflags &= ~FailFast1; if (test_bit(WriteMostly, &rdev->flags)) sb->devflags |= WriteMostly1; @@ -1863,9 +1898,10 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) sb->data_size = cpu_to_le64(num_sectors); sb->super_offset = rdev->sb_start; sb->sb_csum = calc_sb_1_csum(sb); - md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, - rdev->sb_page); - md_super_wait(rdev->mddev); + do { + md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, + rdev->sb_page); + } while (md_super_wait(rdev->mddev) < 0); return num_sectors; } @@ -2004,9 +2040,9 @@ int md_integrity_register(struct mddev *mddev) blk_integrity_register(mddev->gendisk, bdev_get_integrity(reference->bdev)); - printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev)); + pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) { - printk(KERN_ERR "md: failed to create integrity pool for %s\n", + pr_err("md: failed to create integrity pool for %s\n", mdname(mddev)); return -EINVAL; } @@ -2034,8 +2070,8 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev) return 0; if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) { - printk(KERN_NOTICE "%s: incompatible integrity profile for %s\n", - mdname(mddev), bdevname(rdev->bdev, name)); + pr_err("%s: incompatible integrity profile for %s\n", + mdname(mddev), bdevname(rdev->bdev, name)); return -ENXIO; } @@ -2089,15 +2125,15 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) rcu_read_unlock(); if (!test_bit(Journal, &rdev->flags) && mddev->max_disks && rdev->desc_nr >= mddev->max_disks) { - printk(KERN_WARNING "md: %s: array is limited to %d devices\n", - mdname(mddev), mddev->max_disks); + pr_warn("md: %s: array is limited to %d devices\n", + mdname(mddev), mddev->max_disks); return -EBUSY; } bdevname(rdev->bdev,b); strreplace(b, '/', '!'); rdev->mddev = mddev; - printk(KERN_INFO "md: bind<%s>\n", b); + pr_debug("md: bind<%s>\n", b); if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; @@ -2116,8 +2152,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) return 0; fail: - printk(KERN_WARNING "md: failed to register dev-%s for %s\n", - b, mdname(mddev)); + pr_warn("md: failed to register dev-%s for %s\n", + b, mdname(mddev)); return err; } @@ -2134,7 +2170,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk); list_del_rcu(&rdev->same_set); - printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); + pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); sysfs_put(rdev->sysfs_state); @@ -2164,8 +2200,7 @@ static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared) bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, shared ? (struct md_rdev *)lock_rdev : rdev); if (IS_ERR(bdev)) { - printk(KERN_ERR "md: could not open %s.\n", - __bdevname(dev, b)); + pr_warn("md: could not open %s.\n", __bdevname(dev, b)); return PTR_ERR(bdev); } rdev->bdev = bdev; @@ -2185,8 +2220,7 @@ static void export_rdev(struct md_rdev *rdev) { char b[BDEVNAME_SIZE]; - printk(KERN_INFO "md: export_rdev(%s)\n", - bdevname(rdev->bdev,b)); + pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b)); md_rdev_clear(rdev); #ifndef MODULE if (test_bit(AutoDetected, &rdev->flags)) @@ -2288,24 +2322,24 @@ void md_update_sb(struct mddev *mddev, int force_change) if (mddev->ro) { if (force_change) - set_bit(MD_CHANGE_DEVS, &mddev->flags); + set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); return; } repeat: if (mddev_is_clustered(mddev)) { - if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) + if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) force_change = 1; - if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags)) + if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags)) nospares = 1; ret = md_cluster_ops->metadata_update_start(mddev); /* Has someone else has updated the sb */ if (!does_sb_need_changing(mddev)) { if (ret == 0) md_cluster_ops->metadata_update_cancel(mddev); - bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING), - BIT(MD_CHANGE_DEVS) | - BIT(MD_CHANGE_CLEAN)); + bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING), + BIT(MD_SB_CHANGE_DEVS) | + BIT(MD_SB_CHANGE_CLEAN)); return; } } @@ -2321,10 +2355,10 @@ repeat: } if (!mddev->persistent) { - clear_bit(MD_CHANGE_CLEAN, &mddev->flags); - clear_bit(MD_CHANGE_DEVS, &mddev->flags); + clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags); + clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); if (!mddev->external) { - clear_bit(MD_CHANGE_PENDING, &mddev->flags); + clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags); rdev_for_each(rdev, mddev) { if (rdev->badblocks.changed) { rdev->badblocks.changed = 0; @@ -2344,9 +2378,9 @@ repeat: mddev->utime = ktime_get_real_seconds(); - if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) + if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags)) force_change = 1; - if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags)) + if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags)) |