summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2016-12-13 12:40:15 -0800
committerShaohua Li <shli@fb.com>2016-12-13 12:40:15 -0800
commit20737738d397dfadbca1ea50dcc00d7259f500cf (patch)
tree5765b1815331bac9ca32208963c850e60806d6de /drivers/md
parentb78b499a67c3f77aeb6cd0b54724bc38b141255d (diff)
parent2953079c692da067aeb6345659875b97378f9b0a (diff)
Merge branch 'md-next' into md-linus
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c166
-rw-r--r--drivers/md/dm-raid.c4
-rw-r--r--drivers/md/linear.c31
-rw-r--r--drivers/md/md.c701
-rw-r--r--drivers/md/md.h108
-rw-r--r--drivers/md/multipath.c92
-rw-r--r--drivers/md/raid0.c107
-rw-r--r--drivers/md/raid1.c247
-rw-r--r--drivers/md/raid1.h19
-rw-r--r--drivers/md/raid10.c295
-rw-r--r--drivers/md/raid10.h2
-rw-r--r--drivers/md/raid5-cache.c1833
-rw-r--r--drivers/md/raid5.c623
-rw-r--r--drivers/md/raid5.h172
14 files changed, 3168 insertions, 1232 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 2d826927a3bf..9fb2ccac958a 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -27,6 +27,7 @@
#include <linux/mount.h>
#include <linux/buffer_head.h>
#include <linux/seq_file.h>
+#include <trace/events/block.h>
#include "md.h"
#include "bitmap.h"
@@ -208,11 +209,13 @@ static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mdde
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
{
- struct md_rdev *rdev = NULL;
+ struct md_rdev *rdev;
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage;
+restart:
+ rdev = NULL;
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
int size = PAGE_SIZE;
loff_t offset = mddev->bitmap_info.offset;
@@ -268,8 +271,8 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
page);
}
- if (wait)
- md_super_wait(mddev);
+ if (wait && md_super_wait(mddev) < 0)
+ goto restart;
return 0;
bad_alignment:
@@ -405,10 +408,10 @@ static int read_page(struct file *file, unsigned long index,
ret = -EIO;
out:
if (ret)
- printk(KERN_ALERT "md: bitmap read error: (%dB @ %llu): %d\n",
- (int)PAGE_SIZE,
- (unsigned long long)index << PAGE_SHIFT,
- ret);
+ pr_err("md: bitmap read error: (%dB @ %llu): %d\n",
+ (int)PAGE_SIZE,
+ (unsigned long long)index << PAGE_SHIFT,
+ ret);
return ret;
}
@@ -416,6 +419,28 @@ out:
* bitmap file superblock operations
*/
+/*
+ * bitmap_wait_writes() should be called before writing any bitmap
+ * blocks, to ensure previous writes, particularly from
+ * bitmap_daemon_work(), have completed.
+ */
+static void bitmap_wait_writes(struct bitmap *bitmap)
+{
+ if (bitmap->storage.file)
+ wait_event(bitmap->write_wait,
+ atomic_read(&bitmap->pending_writes)==0);
+ else
+ /* Note that we ignore the return value. The writes
+ * might have failed, but that would just mean that
+ * some bits which should be cleared haven't been,
+ * which is safe. The relevant bitmap blocks will
+ * probably get written again, but there is no great
+ * loss if they aren't.
+ */
+ md_super_wait(bitmap->mddev);
+}
+
+
/* update the event counter and sync the superblock to disk */
void bitmap_update_sb(struct bitmap *bitmap)
{
@@ -455,24 +480,24 @@ void bitmap_print_sb(struct bitmap *bitmap)
if (!bitmap || !bitmap->storage.sb_page)
return;
sb = kmap_atomic(bitmap->storage.sb_page);
- printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
- printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic));
- printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version));
- printk(KERN_DEBUG " uuid: %08x.%08x.%08x.%08x\n",
- *(__u32 *)(sb->uuid+0),
- *(__u32 *)(sb->uuid+4),
- *(__u32 *)(sb->uuid+8),
- *(__u32 *)(sb->uuid+12));
- printk(KERN_DEBUG " events: %llu\n",
- (unsigned long long) le64_to_cpu(sb->events));
- printk(KERN_DEBUG "events cleared: %llu\n",
- (unsigned long long) le64_to_cpu(sb->events_cleared));
- printk(KERN_DEBUG " state: %08x\n", le32_to_cpu(sb->state));
- printk(KERN_DEBUG " chunksize: %d B\n", le32_to_cpu(sb->chunksize));
- printk(KERN_DEBUG " daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
- printk(KERN_DEBUG " sync size: %llu KB\n",
- (unsigned long long)le64_to_cpu(sb->sync_size)/2);
- printk(KERN_DEBUG "max write behind: %d\n", le32_to_cpu(sb->write_behind));
+ pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
+ pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic));
+ pr_debug(" version: %d\n", le32_to_cpu(sb->version));
+ pr_debug(" uuid: %08x.%08x.%08x.%08x\n",
+ *(__u32 *)(sb->uuid+0),
+ *(__u32 *)(sb->uuid+4),
+ *(__u32 *)(sb->uuid+8),
+ *(__u32 *)(sb->uuid+12));
+ pr_debug(" events: %llu\n",
+ (unsigned long long) le64_to_cpu(sb->events));
+ pr_debug("events cleared: %llu\n",
+ (unsigned long long) le64_to_cpu(sb->events_cleared));
+ pr_debug(" state: %08x\n", le32_to_cpu(sb->state));
+ pr_debug(" chunksize: %d B\n", le32_to_cpu(sb->chunksize));
+ pr_debug(" daemon sleep: %ds\n", le32_to_cpu(sb->daemon_sleep));
+ pr_debug(" sync size: %llu KB\n",
+ (unsigned long long)le64_to_cpu(sb->sync_size)/2);
+ pr_debug("max write behind: %d\n", le32_to_cpu(sb->write_behind));
kunmap_atomic(sb);
}
@@ -506,14 +531,14 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
BUG_ON(!chunksize);
if (!is_power_of_2(chunksize)) {
kunmap_atomic(sb);
- printk(KERN_ERR "bitmap chunksize not a power of 2\n");
+ pr_warn("bitmap chunksize not a power of 2\n");
return -EINVAL;
}
sb->chunksize = cpu_to_le32(chunksize);
daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
- printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
+ pr_debug("Choosing daemon_sleep default (5 sec)\n");
daemon_sleep = 5 * HZ;
}
sb->daemon_sleep = cpu_to_le32(daemon_sleep);
@@ -584,7 +609,7 @@ re_read:
/* to 4k blocks */
bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
- pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
+ pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
bitmap->cluster_slot, offset);
}
@@ -634,7 +659,7 @@ re_read:
else if (write_behind > COUNTER_MAX)
reason = "write-behind limit out of range (0 - 16383)";
if (reason) {
- printk(KERN_INFO "%s: invalid bitmap file superblock: %s\n",
+ pr_warn("%s: invalid bitmap file superblock: %s\n",
bmname(bitmap), reason);
goto out;
}
@@ -648,18 +673,15 @@ re_read:
* bitmap's UUID and event counter to the mddev's
*/
if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) {
- printk(KERN_INFO
- "%s: bitmap superblock UUID mismatch\n",
- bmname(bitmap));
+ pr_warn("%s: bitmap superblock UUID mismatch\n",
+ bmname(bitmap));
goto out;
}
events = le64_to_cpu(sb->events);
if (!nodes && (events < bitmap->mddev->events)) {
- printk(KERN_INFO
- "%s: bitmap file is out of date (%llu < %llu) "
- "-- forcing full recovery\n",
- bmname(bitmap), events,
- (unsigned long long) bitmap->mddev->events);
+ pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n",
+ bmname(bitmap), events,
+ (unsigned long long) bitmap->mddev->events);
set_bit(BITMAP_STALE, &bitmap->flags);
}
}
@@ -679,8 +701,8 @@ out:
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
err = md_setup_cluster(bitmap->mddev, nodes);
if (err) {
- pr_err("%s: Could not setup cluster service (%d)\n",
- bmname(bitmap), err);
+ pr_warn("%s: Could not setup cluster service (%d)\n",
+ bmname(bitmap), err);
goto out_no_sb;
}
bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev);
@@ -847,15 +869,13 @@ static void bitmap_file_kick(struct bitmap *bitmap)
ptr = file_path(bitmap->storage.file,
path, PAGE_SIZE);
- printk(KERN_ALERT
- "%s: kicking failed bitmap file %s from array!\n",
- bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
+ pr_warn("%s: kicking failed bitmap file %s from array!\n",
+ bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
kfree(path);
} else
- printk(KERN_ALERT
- "%s: disabling internal bitmap due to errors\n",
- bmname(bitmap));
+ pr_warn("%s: disabling internal bitmap due to errors\n",
+ bmname(bitmap));
}
}
@@ -983,6 +1003,7 @@ void bitmap_unplug(struct bitmap *bitmap)
{
unsigned long i;
int dirty, need_write;
+ int writing = 0;
if (!bitmap || !bitmap->storage.filemap ||
test_bit(BITMAP_STALE, &bitmap->flags))
@@ -997,15 +1018,19 @@ void bitmap_unplug(struct bitmap *bitmap)
need_write = test_and_clear_page_attr(bitmap, i,
BITMAP_PAGE_NEEDWRITE);
if (dirty || need_write) {
+ if (!writing) {
+ bitmap_wait_writes(bitmap);
+ if (bitmap->mddev->queue)
+ blk_add_trace_msg(bitmap->mddev->queue,
+ "md bitmap_unplug");
+ }
clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING);
write_page(bitmap, bitmap->storage.filemap[i], 0);
+ writing = 1;
}
}
- if (bitmap->storage.file)
- wait_event(bitmap->write_wait,
- atomic_read(&bitmap->pending_writes)==0);
- else
- md_super_wait(bitmap->mddev);
+ if (writing)
+ bitmap_wait_writes(bitmap);
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))
bitmap_file_kick(bitmap);
@@ -1056,14 +1081,13 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
outofdate = test_bit(BITMAP_STALE, &bitmap->flags);
if (outofdate)
- printk(KERN_INFO "%s: bitmap file is out of date, doing full "
- "recovery\n", bmname(bitmap));
+ pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap));
if (file && i_size_read(file->f_mapping->host) < store->bytes) {
- printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
- bmname(bitmap),
- (unsigned long) i_size_read(file->f_mapping->host),
- store->bytes);
+ pr_warn("%s: bitmap file too short %lu < %lu\n",
+ bmname(bitmap),
+ (unsigned long) i_size_read(file->f_mapping->host),
+ store->bytes);
goto err;
}
@@ -1137,16 +1161,15 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
offset = 0;
}
- printk(KERN_INFO "%s: bitmap initialized from disk: "
- "read %lu pages, set %lu of %lu bits\n",
- bmname(bitmap), store->file_pages,
- bit_cnt, chunks);
+ pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n",
+ bmname(bitmap), store->file_pages,
+ bit_cnt, chunks);
return 0;
err:
- printk(KERN_INFO "%s: bitmap initialisation failed: %d\n",
- bmname(bitmap), ret);
+ pr_warn("%s: bitmap initialisation failed: %d\n",
+ bmname(bitmap), ret);
return ret;
}
@@ -1225,6 +1248,10 @@ void bitmap_daemon_work(struct mddev *mddev)
}
bitmap->allclean = 1;
+ if (bitmap->mddev->queue)
+ blk_add_trace_msg(bitmap->mddev->queue,
+ "md bitmap_daemon_work");
+
/* Any file-page which is PENDING now needs to be written.
* So set NEEDWRITE now, then after we make any last-minute changes
* we will write it.
@@ -1289,6 +1316,7 @@ void bitmap_daemon_work(struct mddev *mddev)
}
spin_unlock_irq(&counts->lock);
+ bitmap_wait_writes(bitmap);
/* Now start writeout on any page in NEEDWRITE that isn't DIRTY.
* DIRTY pages need to be written by bitmap_unplug so it can wait
* for them.
@@ -1595,7 +1623,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector, bool force)
atomic_read(&bitmap->mddev->recovery_active) == 0);
bitmap->mddev->curr_resync_completed = sector;
- set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
+ set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags);
sector &= ~((1ULL << bitmap->counts.chunkshift) - 1);
s = 0;
while (s < sector && s < bitmap->mddev->resync_max_sectors) {
@@ -1825,8 +1853,8 @@ struct bitmap *bitmap_create(struct mddev *mddev, int slot)
if (err)
goto error;
- printk(KERN_INFO "created bitmap (%lu pages) for device %s\n",
- bitmap->counts.pages, bmname(bitmap));
+ pr_debug("created bitmap (%lu pages) for device %s\n",
+ bitmap->counts.pages, bmname(bitmap));
err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0;
if (err)
@@ -2029,8 +2057,10 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
!bitmap->mddev->bitmap_info.external,
mddev_is_clustered(bitmap->mddev)
? bitmap->cluster_slot : 0);
- if (ret)
+ if (ret) {
+ bitmap_file_unmap(&store);
goto err;
+ }
pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO);
@@ -2089,7 +2119,7 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
bitmap->mddev->bitmap_info.chunksize = 1 << (old_counts.chunkshift +
BITMAP_BLOCK_SHIFT);
blocks = old_counts.chunks << old_counts.chunkshift;
- pr_err("Could not pre-allocate in-memory bitmap for cluster raid\n");
+ pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n");
break;
} else
bitmap->counts.bp[page].count += 1;
@@ -2266,7 +2296,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
/* Ensure new bitmap info is stored in
* metadata promptly.
*/
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
md_wakeup_thread(mddev->thread);
}
rv = 0;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 6d53810963f7..953159d9a825 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2011,7 +2011,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190);
/* Force writing of superblocks to disk */
- set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
+ set_bit(MD_SB_CHANGE_DEVS, &rdev->mddev->sb_flags);
/* Any superblock is better than none, choose that if given */
return refdev ? 0 : 1;
@@ -3497,7 +3497,7 @@ static void rs_update_sbs(struct raid_set *rs)
struct mddev *mddev = &rs->md;
int ro = mddev->ro;
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
mddev->ro = 0;
md_update_sb(mddev, 1);
mddev->ro = ro;
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 86f5d435901d..5975c9915684 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
+#include <trace/events/block.h>
#include "md.h"
#include "linear.h"
@@ -101,8 +102,8 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
sector_t sectors;
if (j < 0 || j >= raid_disks || disk->rdev) {
- printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n",
- mdname(mddev));
+ pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
+ mdname(mddev));
goto out;
}
@@ -123,8 +124,8 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
discard_supported = true;
}
if (cnt != raid_disks) {
- printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n",
- mdname(mddev));
+ pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
+ mdname(mddev));
goto out;
}
@@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
}
do {
- tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
+ sector_t bio_sector = bio->bi_iter.bi_sector;
+ tmp_dev = which_dev(mddev, bio_sector);
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
end_sector = tmp_dev->end_sector;
data_offset = tmp_dev->rdev->data_offset;
bio->bi_bdev = tmp_dev->rdev->bdev;
- if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
- bio->bi_iter.bi_sector < start_sector))
+ if (unlikely(bio_sector >= end_sector ||
+ bio_sector < start_sector))
goto out_of_bounds;
if (unlikely(bio_end_sector(bio) > end_sector)) {
/* This bio crosses a device boundary, so we have to
* split it.
*/
- split = bio_split(bio, end_sector -
- bio->bi_iter.bi_sector,
+ split = bio_split(bio, end_sector - bio_sector,
GFP_NOIO, fs_bio_set);
bio_chain(split, bio);
} else {
@@ -256,15 +257,18 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
!blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
/* Just ignore it */
bio_endio(split);
- } else
+ } else {
+ if (mddev->gendisk)
+ trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
+ split, disk_devt(mddev->gendisk),
+ bio_sector);
generic_make_request(split);
+ }
} while (split != bio);
return;
out_of_bounds:
- printk(KERN_ERR
- "md/linear:%s: make_request: Sector %llu out of bounds on "
- "dev %s: %llu sectors, offset %llu\n",
+ pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n",
mdname(mddev),
(unsigned long long)bio->bi_iter.bi_sector,
bdevname(tmp_dev->rdev->bdev, b),
@@ -275,7 +279,6 @@ out_of_bounds:
static void linear_status (struct seq_file *seq, struct mddev *mddev)
{
-
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f975cd08923d..82821ee0d57f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -30,6 +30,18 @@
You should have received a copy of the GNU General Public License
(for example /usr/src/linux/COPYING); if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Errors, Warnings, etc.
+ Please use:
+ pr_crit() for error conditions that risk data loss
+ pr_err() for error conditions that are unexpected, like an IO error
+ or internal inconsistency
+ pr_warn() for error conditions that could have been predicated, like
+ adding a device to an array when it has incompatible metadata
+ pr_info() for every interesting, very rare events, like an array starting
+ or stopping, or resync starting or stopping
+ pr_debug() for everything else.
+
*/
#include <linux/kthread.h>
@@ -52,6 +64,7 @@
#include <linux/raid/md_p.h>
#include <linux/raid/md_u.h>
#include <linux/slab.h>
+#include <trace/events/block.h>
#include "md.h"
#include "bitmap.h"
#include "md-cluster.h"
@@ -684,11 +697,8 @@ static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
static int alloc_disk_sb(struct md_rdev *rdev)
{
rdev->sb_page = alloc_page(GFP_KERNEL);
- if (!rdev->sb_page) {
- printk(KERN_ALERT "md: out of memory.\n");
+ if (!rdev->sb_page)
return -ENOMEM;
- }
-
return 0;
}
@@ -715,9 +725,15 @@ static void super_written(struct bio *bio)
struct mddev *mddev = rdev->mddev;
if (bio->bi_error) {
- printk("md: super_written gets error=%d\n", bio->bi_error);
+ pr_err("md: super_written gets error=%d\n", bio->bi_error);
md_error(mddev, rdev);
- }
+ if (!test_bit(Faulty, &rdev->flags)
+ && (bio->bi_opf & MD_FAILFAST)) {
+ set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
+ set_bit(LastDev, &rdev->flags);
+ }
+ } else
+ clear_bit(LastDev, &rdev->flags);
if (atomic_dec_and_test(&mddev->pending_writes))
wake_up(&mddev->sb_wait);
@@ -734,7 +750,13 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
* if zero is reached.
* If an error occurred, call md_error
*/
- struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
+ struct bio *bio;
+ int ff = 0;
+
+ if (test_bit(Faulty, &rdev->flags))
+ return;
+
+ bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
atomic_inc(&rdev->nr_pending);
@@ -743,16 +765,24 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
bio_add_page(bio, page, size, 0);
bio->bi_private = rdev;
bio->bi_end_io = super_written;
- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA;
+
+ if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
+ test_bit(FailFast, &rdev->flags) &&
+ !test_bit(LastDev, &rdev->flags))
+ ff = MD_FAILFAST;
+ bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff;
atomic_inc(&mddev->pending_writes);
submit_bio(bio);
}
-void md_super_wait(struct mddev *mddev)
+int md_super_wait(struct mddev *mddev)
{
/* wait for all superblock writes that were scheduled to complete */
wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
+ if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
+ return -EAGAIN;
+ return 0;
}
int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
@@ -795,8 +825,8 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
return 0;
fail:
- printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
- bdevname(rdev->bdev,b));
+ pr_err("md: disabled device %s, could not read superblock.\n",
+ bdevname(rdev->bdev,b));
return -EINVAL;
}
@@ -818,7 +848,6 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
if (!tmp1 || !tmp2) {
ret = 0;
- printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
goto abort;
}
@@ -932,7 +961,7 @@ int md_check_no_bitmap(struct mddev *mddev)
{
if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
return 0;
- printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
+ pr_warn("%s: bitmaps are not supported for %s\n",
mdname(mddev), mddev->pers->name);
return 1;
}
@@ -956,7 +985,8 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
rdev->sb_start = calc_dev_sboffset(rdev);
ret = read_disk_sb(rdev, MD_SB_BYTES);
- if (ret) return ret;
+ if (ret)
+ return ret;
ret = -EINVAL;
@@ -964,17 +994,15 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
sb = page_address(rdev->sb_page);
if (sb->md_magic != MD_SB_MAGIC) {
- printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
- b);
+ pr_warn("md: invalid raid superblock magic on %s\n", b);
goto abort;
}
if (sb->major_version != 0 ||
sb->minor_version < 90 ||
sb->minor_version > 91) {
- printk(KERN_WARNING "Bad version number %d.%d on %s\n",
- sb->major_version, sb->minor_version,
- b);
+ pr_warn("Bad version number %d.%d on %s\n",
+ sb->major_version, sb->minor_version, b);
goto abort;
}
@@ -982,8 +1010,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
goto abort;
if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
- printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
- b);
+ pr_warn("md: invalid superblock checksum on %s\n", b);
goto abort;
}
@@ -1004,14 +1031,13 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
__u64 ev1, ev2;
mdp_super_t *refsb = page_address(refdev->sb_page);
if (!uuid_equal(refsb, sb)) {
- printk(KERN_WARNING "md: %s has different UUID to %s\n",
+ pr_warn("md: %s has different UUID to %s\n",
b, bdevname(refdev->bdev,b2));
goto abort;
}
if (!sb_equal(refsb, sb)) {
- printk(KERN_WARNING "md: %s has same UUID"
- " but different superblock to %s\n",
- b, bdevname(refdev->bdev, b2));
+ pr_warn("md: %s has same UUID but different superblock to %s\n",
+ b, bdevname(refdev->bdev, b2));
goto abort;
}
ev1 = md_event(sb);
@@ -1158,6 +1184,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
}
if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
set_bit(WriteMostly, &rdev->flags);
+ if (desc->state & (1<<MD_DISK_FAILFAST))
+ set_bit(FailFast, &rdev->flags);
} else /* MULTIPATH are always insync */
set_bit(In_sync, &rdev->flags);
return 0;
@@ -1283,6 +1311,8 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
}
if (test_bit(WriteMostly, &rdev2->flags))
d->state |= (1<<MD_DISK_WRITEMOSTLY);
+ if (test_bit(FailFast, &rdev2->flags))
+ d->state |= (1<<MD_DISK_FAILFAST);
}
/* now set the "removed" and "faulty" bits on any missing devices */
for (i=0 ; i < mddev->raid_disks ; i++) {
@@ -1324,9 +1354,10 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
rdev->mddev->level >= 1)
num_sectors = (sector_t)(2ULL << 32) - 2;
- md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
+ do {
+ md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
- md_super_wait(rdev->mddev);
+ } while (md_super_wait(rdev->mddev) < 0);
return num_sectors;
}
@@ -1413,13 +1444,13 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
return -EINVAL;
if (calc_sb_1_csum(sb) != sb->sb_csum) {
- printk("md: invalid superblock checksum on %s\n",
+ pr_warn("md: invalid superblock checksum on %s\n",
bdevname(rdev->bdev,b));
return -EINVAL;
}
if (le64_to_cpu(sb->data_size) < 10) {
- printk("md: data_size too small on %s\n",
- bdevname(rdev->bdev,b));
+ pr_warn("md: data_size too small on %s\n",
+ bdevname(rdev->bdev,b));
return -EINVAL;
}
if (sb->pad0 ||
@@ -1503,8 +1534,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
sb->level != refsb->level ||
sb->layout != refsb->layout ||
sb->chunksize != refsb->chunksize) {
- printk(KERN_WARNING "md: %s has strangely different"
- " superblock to %s\n",
+ pr_warn("md: %s has strangely different superblock to %s\n",
bdevname(rdev->bdev,b),
bdevname(refdev->bdev,b2));
return -EINVAL;
@@ -1646,8 +1676,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
case MD_DISK_ROLE_JOURNAL: /* journal device */
if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
/* journal device without journal feature */
- printk(KERN_WARNING
- "md: journal device provided without journal feature, ignoring the device\n");
+ pr_warn("md: journal device provided without journal feature, ignoring the device\n");
return -EINVAL;
}
set_bit(Journal, &rdev->flags);
@@ -1669,6 +1698,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
}
if (sb->devflags & WriteMostly1)
set_bit(WriteMostly, &rdev->flags);
+ if (sb->devflags & FailFast1)
+ set_bit(FailFast, &rdev->flags);
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
set_bit(Replacement, &rdev->flags);
} else /* MULTIPATH are always insync */
@@ -1707,6 +1738,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
sb->level = cpu_to_le32(mddev->level);
sb->layout = cpu_to_le32(mddev->layout);
+ if (test_bit(FailFast, &rdev->flags))
+ sb->devflags |= FailFast1;
+ else
+ sb->devflags &= ~FailFast1;
if (test_bit(WriteMostly, &rdev->flags))
sb->devflags |= WriteMostly1;
@@ -1863,9 +1898,10 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
sb->data_size = cpu_to_le64(num_sectors);
sb->super_offset = rdev->sb_start;
sb->sb_csum = calc_sb_1_csum(sb);
- md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
- rdev->sb_page);
- md_super_wait(rdev->mddev);
+ do {
+ md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
+ rdev->sb_page);
+ } while (md_super_wait(rdev->mddev) < 0);
return num_sectors;
}
@@ -2004,9 +2040,9 @@ int md_integrity_register(struct mddev *mddev)
blk_integrity_register(mddev->gendisk,
bdev_get_integrity(reference->bdev));
- printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
+ pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
- printk(KERN_ERR "md: failed to create integrity pool for %s\n",
+ pr_err("md: failed to create integrity pool for %s\n",
mdname(mddev));
return -EINVAL;
}
@@ -2034,8 +2070,8 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
return 0;
if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
- printk(KERN_NOTICE "%s: incompatible integrity profile for %s\n",
- mdname(mddev), bdevname(rdev->bdev, name));
+ pr_err("%s: incompatible integrity profile for %s\n",
+ mdname(mddev), bdevname(rdev->bdev, name));
return -ENXIO;
}
@@ -2089,15 +2125,15 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
rcu_read_unlock();
if (!test_bit(Journal, &rdev->flags) &&
mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
- printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
- mdname(mddev), mddev->max_disks);
+ pr_warn("md: %s: array is limited to %d devices\n",
+ mdname(mddev), mddev->max_disks);
return -EBUSY;
}
bdevname(rdev->bdev,b);
strreplace(b, '/', '!');
rdev->mddev = mddev;
- printk(KERN_INFO "md: bind<%s>\n", b);
+ pr_debug("md: bind<%s>\n", b);
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
goto fail;
@@ -2116,8 +2152,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
return 0;
fail:
- printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
- b, mdname(mddev));
+ pr_warn("md: failed to register dev-%s for %s\n",
+ b, mdname(mddev));
return err;
}
@@ -2134,7 +2170,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
list_del_rcu(&rdev->same_set);
- printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
+ pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state);
@@ -2164,8 +2200,7 @@ static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
shared ? (struct md_rdev *)lock_rdev : rdev);
if (IS_ERR(bdev)) {
- printk(KERN_ERR "md: could not open %s.\n",
- __bdevname(dev, b));
+ pr_warn("md: could not open %s.\n", __bdevname(dev, b));
return PTR_ERR(bdev);
}
rdev->bdev = bdev;
@@ -2185,8 +2220,7 @@ static void export_rdev(struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
- printk(KERN_INFO "md: export_rdev(%s)\n",
- bdevname(rdev->bdev,b));
+ pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
md_rdev_clear(rdev);
#ifndef MODULE
if (test_bit(AutoDetected, &rdev->flags))
@@ -2288,24 +2322,24 @@ void md_update_sb(struct mddev *mddev, int force_change)
if (mddev->ro) {
if (force_change)
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
return;
}
repeat:
if (mddev_is_clustered(mddev)) {
- if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
+ if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
force_change = 1;
- if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
+ if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
nospares = 1;
ret = md_cluster_ops->metadata_update_start(mddev);
/* Has someone else has updated the sb */
if (!does_sb_need_changing(mddev)) {
if (ret == 0)
md_cluster_ops->metadata_update_cancel(mddev);
- bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
- BIT(MD_CHANGE_DEVS) |
- BIT(MD_CHANGE_CLEAN));
+ bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
+ BIT(MD_SB_CHANGE_DEVS) |
+ BIT(MD_SB_CHANGE_CLEAN));
return;
}
}
@@ -2321,10 +2355,10 @@ repeat:
}
if (!mddev->persistent) {
- clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
- clear_bit(MD_CHANGE_DEVS, &mddev->flags);
+ clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
+ clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
if (!mddev->external) {
- clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+ clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
rdev_for_each(rdev, mddev) {
if (rdev->badblocks.changed) {
rdev->badblocks.changed = 0;
@@ -2344,9 +2378,9 @@ repeat:
mddev->utime = ktime_get_real_seconds();
- if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
+ if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
force_change = 1;
- if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
+ if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))