summaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2016-12-13 12:40:15 -0800
committerShaohua Li <shli@fb.com>2016-12-13 12:40:15 -0800
commit20737738d397dfadbca1ea50dcc00d7259f500cf (patch)
tree5765b1815331bac9ca32208963c850e60806d6de /drivers/md/md.c
parentb78b499a67c3f77aeb6cd0b54724bc38b141255d (diff)
parent2953079c692da067aeb6345659875b97378f9b0a (diff)
Merge branch 'md-next' into md-linus
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c701
1 files changed, 364 insertions, 337 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f975cd08923d..82821ee0d57f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -30,6 +30,18 @@
You should have received a copy of the GNU General Public License
(for example /usr/src/linux/COPYING); if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Errors, Warnings, etc.
+ Please use:
+ pr_crit() for error conditions that risk data loss
+ pr_err() for error conditions that are unexpected, like an IO error
+ or internal inconsistency
+ pr_warn() for error conditions that could have been predicated, like
+ adding a device to an array when it has incompatible metadata
+ pr_info() for every interesting, very rare events, like an array starting
+ or stopping, or resync starting or stopping
+ pr_debug() for everything else.
+
*/
#include <linux/kthread.h>
@@ -52,6 +64,7 @@
#include <linux/raid/md_p.h>
#include <linux/raid/md_u.h>
#include <linux/slab.h>
+#include <trace/events/block.h>
#include "md.h"
#include "bitmap.h"
#include "md-cluster.h"
@@ -684,11 +697,8 @@ static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
static int alloc_disk_sb(struct md_rdev *rdev)
{
rdev->sb_page = alloc_page(GFP_KERNEL);
- if (!rdev->sb_page) {
- printk(KERN_ALERT "md: out of memory.\n");
+ if (!rdev->sb_page)
return -ENOMEM;
- }
-
return 0;
}
@@ -715,9 +725,15 @@ static void super_written(struct bio *bio)
struct mddev *mddev = rdev->mddev;
if (bio->bi_error) {
- printk("md: super_written gets error=%d\n", bio->bi_error);
+ pr_err("md: super_written gets error=%d\n", bio->bi_error);
md_error(mddev, rdev);
- }
+ if (!test_bit(Faulty, &rdev->flags)
+ && (bio->bi_opf & MD_FAILFAST)) {
+ set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
+ set_bit(LastDev, &rdev->flags);
+ }
+ } else
+ clear_bit(LastDev, &rdev->flags);
if (atomic_dec_and_test(&mddev->pending_writes))
wake_up(&mddev->sb_wait);
@@ -734,7 +750,13 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
* if zero is reached.
* If an error occurred, call md_error
*/
- struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
+ struct bio *bio;
+ int ff = 0;
+
+ if (test_bit(Faulty, &rdev->flags))
+ return;
+
+ bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
atomic_inc(&rdev->nr_pending);
@@ -743,16 +765,24 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
bio_add_page(bio, page, size, 0);
bio->bi_private = rdev;
bio->bi_end_io = super_written;
- bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA;
+
+ if (test_bit(MD_FAILFAST_SUPPORTED, &mddev->flags) &&
+ test_bit(FailFast, &rdev->flags) &&
+ !test_bit(LastDev, &rdev->flags))
+ ff = MD_FAILFAST;
+ bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_FUA | ff;
atomic_inc(&mddev->pending_writes);
submit_bio(bio);
}
-void md_super_wait(struct mddev *mddev)
+int md_super_wait(struct mddev *mddev)
{
/* wait for all superblock writes that were scheduled to complete */
wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
+ if (test_and_clear_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags))
+ return -EAGAIN;
+ return 0;
}
int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
@@ -795,8 +825,8 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
return 0;
fail:
- printk(KERN_WARNING "md: disabled device %s, could not read superblock.\n",
- bdevname(rdev->bdev,b));
+ pr_err("md: disabled device %s, could not read superblock.\n",
+ bdevname(rdev->bdev,b));
return -EINVAL;
}
@@ -818,7 +848,6 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
if (!tmp1 || !tmp2) {
ret = 0;
- printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n");
goto abort;
}
@@ -932,7 +961,7 @@ int md_check_no_bitmap(struct mddev *mddev)
{
if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
return 0;
- printk(KERN_ERR "%s: bitmaps are not supported for %s\n",
+ pr_warn("%s: bitmaps are not supported for %s\n",
mdname(mddev), mddev->pers->name);
return 1;
}
@@ -956,7 +985,8 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
rdev->sb_start = calc_dev_sboffset(rdev);
ret = read_disk_sb(rdev, MD_SB_BYTES);
- if (ret) return ret;
+ if (ret)
+ return ret;
ret = -EINVAL;
@@ -964,17 +994,15 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
sb = page_address(rdev->sb_page);
if (sb->md_magic != MD_SB_MAGIC) {
- printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
- b);
+ pr_warn("md: invalid raid superblock magic on %s\n", b);
goto abort;
}
if (sb->major_version != 0 ||
sb->minor_version < 90 ||
sb->minor_version > 91) {
- printk(KERN_WARNING "Bad version number %d.%d on %s\n",
- sb->major_version, sb->minor_version,
- b);
+ pr_warn("Bad version number %d.%d on %s\n",
+ sb->major_version, sb->minor_version, b);
goto abort;
}
@@ -982,8 +1010,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
goto abort;
if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
- printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
- b);
+ pr_warn("md: invalid superblock checksum on %s\n", b);
goto abort;
}
@@ -1004,14 +1031,13 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
__u64 ev1, ev2;
mdp_super_t *refsb = page_address(refdev->sb_page);
if (!uuid_equal(refsb, sb)) {
- printk(KERN_WARNING "md: %s has different UUID to %s\n",
+ pr_warn("md: %s has different UUID to %s\n",
b, bdevname(refdev->bdev,b2));
goto abort;
}
if (!sb_equal(refsb, sb)) {
- printk(KERN_WARNING "md: %s has same UUID"
- " but different superblock to %s\n",
- b, bdevname(refdev->bdev, b2));
+ pr_warn("md: %s has same UUID but different superblock to %s\n",
+ b, bdevname(refdev->bdev, b2));
goto abort;
}
ev1 = md_event(sb);
@@ -1158,6 +1184,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
}
if (desc->state & (1<<MD_DISK_WRITEMOSTLY))
set_bit(WriteMostly, &rdev->flags);
+ if (desc->state & (1<<MD_DISK_FAILFAST))
+ set_bit(FailFast, &rdev->flags);
} else /* MULTIPATH are always insync */
set_bit(In_sync, &rdev->flags);
return 0;
@@ -1283,6 +1311,8 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
}
if (test_bit(WriteMostly, &rdev2->flags))
d->state |= (1<<MD_DISK_WRITEMOSTLY);
+ if (test_bit(FailFast, &rdev2->flags))
+ d->state |= (1<<MD_DISK_FAILFAST);
}
/* now set the "removed" and "faulty" bits on any missing devices */
for (i=0 ; i < mddev->raid_disks ; i++) {
@@ -1324,9 +1354,10 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
if (IS_ENABLED(CONFIG_LBDAF) && (u64)num_sectors >= (2ULL << 32) &&
rdev->mddev->level >= 1)
num_sectors = (sector_t)(2ULL << 32) - 2;
- md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
+ do {
+ md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
- md_super_wait(rdev->mddev);
+ } while (md_super_wait(rdev->mddev) < 0);
return num_sectors;
}
@@ -1413,13 +1444,13 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
return -EINVAL;
if (calc_sb_1_csum(sb) != sb->sb_csum) {
- printk("md: invalid superblock checksum on %s\n",
+ pr_warn("md: invalid superblock checksum on %s\n",
bdevname(rdev->bdev,b));
return -EINVAL;
}
if (le64_to_cpu(sb->data_size) < 10) {
- printk("md: data_size too small on %s\n",
- bdevname(rdev->bdev,b));
+ pr_warn("md: data_size too small on %s\n",
+ bdevname(rdev->bdev,b));
return -EINVAL;
}
if (sb->pad0 ||
@@ -1503,8 +1534,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
sb->level != refsb->level ||
sb->layout != refsb->layout ||
sb->chunksize != refsb->chunksize) {
- printk(KERN_WARNING "md: %s has strangely different"
- " superblock to %s\n",
+ pr_warn("md: %s has strangely different superblock to %s\n",
bdevname(rdev->bdev,b),
bdevname(refdev->bdev,b2));
return -EINVAL;
@@ -1646,8 +1676,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
case MD_DISK_ROLE_JOURNAL: /* journal device */
if (!(le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)) {
/* journal device without journal feature */
- printk(KERN_WARNING
- "md: journal device provided without journal feature, ignoring the device\n");
+ pr_warn("md: journal device provided without journal feature, ignoring the device\n");
return -EINVAL;
}
set_bit(Journal, &rdev->flags);
@@ -1669,6 +1698,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
}
if (sb->devflags & WriteMostly1)
set_bit(WriteMostly, &rdev->flags);
+ if (sb->devflags & FailFast1)
+ set_bit(FailFast, &rdev->flags);
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
set_bit(Replacement, &rdev->flags);
} else /* MULTIPATH are always insync */
@@ -1707,6 +1738,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
sb->level = cpu_to_le32(mddev->level);
sb->layout = cpu_to_le32(mddev->layout);
+ if (test_bit(FailFast, &rdev->flags))
+ sb->devflags |= FailFast1;
+ else
+ sb->devflags &= ~FailFast1;
if (test_bit(WriteMostly, &rdev->flags))
sb->devflags |= WriteMostly1;
@@ -1863,9 +1898,10 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
sb->data_size = cpu_to_le64(num_sectors);
sb->super_offset = rdev->sb_start;
sb->sb_csum = calc_sb_1_csum(sb);
- md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
- rdev->sb_page);
- md_super_wait(rdev->mddev);
+ do {
+ md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
+ rdev->sb_page);
+ } while (md_super_wait(rdev->mddev) < 0);
return num_sectors;
}
@@ -2004,9 +2040,9 @@ int md_integrity_register(struct mddev *mddev)
blk_integrity_register(mddev->gendisk,
bdev_get_integrity(reference->bdev));
- printk(KERN_NOTICE "md: data integrity enabled on %s\n", mdname(mddev));
+ pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
if (bioset_integrity_create(mddev->bio_set, BIO_POOL_SIZE)) {
- printk(KERN_ERR "md: failed to create integrity pool for %s\n",
+ pr_err("md: failed to create integrity pool for %s\n",
mdname(mddev));
return -EINVAL;
}
@@ -2034,8 +2070,8 @@ int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
return 0;
if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) {
- printk(KERN_NOTICE "%s: incompatible integrity profile for %s\n",
- mdname(mddev), bdevname(rdev->bdev, name));
+ pr_err("%s: incompatible integrity profile for %s\n",
+ mdname(mddev), bdevname(rdev->bdev, name));
return -ENXIO;
}
@@ -2089,15 +2125,15 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
rcu_read_unlock();
if (!test_bit(Journal, &rdev->flags) &&
mddev->max_disks && rdev->desc_nr >= mddev->max_disks) {
- printk(KERN_WARNING "md: %s: array is limited to %d devices\n",
- mdname(mddev), mddev->max_disks);
+ pr_warn("md: %s: array is limited to %d devices\n",
+ mdname(mddev), mddev->max_disks);
return -EBUSY;
}
bdevname(rdev->bdev,b);
strreplace(b, '/', '!');
rdev->mddev = mddev;
- printk(KERN_INFO "md: bind<%s>\n", b);
+ pr_debug("md: bind<%s>\n", b);
if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b)))
goto fail;
@@ -2116,8 +2152,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
return 0;
fail:
- printk(KERN_WARNING "md: failed to register dev-%s for %s\n",
- b, mdname(mddev));
+ pr_warn("md: failed to register dev-%s for %s\n",
+ b, mdname(mddev));
return err;
}
@@ -2134,7 +2170,7 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
list_del_rcu(&rdev->same_set);
- printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
+ pr_debug("md: unbind<%s>\n", bdevname(rdev->bdev,b));
rdev->mddev = NULL;
sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state);
@@ -2164,8 +2200,7 @@ static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
shared ? (struct md_rdev *)lock_rdev : rdev);
if (IS_ERR(bdev)) {
- printk(KERN_ERR "md: could not open %s.\n",
- __bdevname(dev, b));
+ pr_warn("md: could not open %s.\n", __bdevname(dev, b));
return PTR_ERR(bdev);
}
rdev->bdev = bdev;
@@ -2185,8 +2220,7 @@ static void export_rdev(struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
- printk(KERN_INFO "md: export_rdev(%s)\n",
- bdevname(rdev->bdev,b));
+ pr_debug("md: export_rdev(%s)\n", bdevname(rdev->bdev,b));
md_rdev_clear(rdev);
#ifndef MODULE
if (test_bit(AutoDetected, &rdev->flags))
@@ -2288,24 +2322,24 @@ void md_update_sb(struct mddev *mddev, int force_change)
if (mddev->ro) {
if (force_change)
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
return;
}
repeat:
if (mddev_is_clustered(mddev)) {
- if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
+ if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
force_change = 1;
- if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
+ if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
nospares = 1;
ret = md_cluster_ops->metadata_update_start(mddev);
/* Has someone else has updated the sb */
if (!does_sb_need_changing(mddev)) {
if (ret == 0)
md_cluster_ops->metadata_update_cancel(mddev);
- bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
- BIT(MD_CHANGE_DEVS) |
- BIT(MD_CHANGE_CLEAN));
+ bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
+ BIT(MD_SB_CHANGE_DEVS) |
+ BIT(MD_SB_CHANGE_CLEAN));
return;
}
}
@@ -2321,10 +2355,10 @@ repeat:
}
if (!mddev->persistent) {
- clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
- clear_bit(MD_CHANGE_DEVS, &mddev->flags);
+ clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
+ clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
if (!mddev->external) {
- clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+ clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
rdev_for_each(rdev, mddev) {
if (rdev->badblocks.changed) {
rdev->badblocks.changed = 0;
@@ -2344,9 +2378,9 @@ repeat:
mddev->utime = ktime_get_real_seconds();
- if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
+ if (test_and_clear_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags))
force_change = 1;
- if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
+ if (test_and_clear_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags))
/* just a clean<-> dirty transition, possibly leave spares alone,
* though if events isn't the right even/odd, we will have to do
* spares after all
@@ -2402,6 +2436,9 @@ repeat:
pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
mdname(mddev), mddev->in_sync);
+ if (mddev->queue)
+ blk_add_trace_msg(mddev->queue, "md md_update_sb");
+rewrite:
bitmap_update_sb(mddev->bitmap);
rdev_for_each(rdev, mddev) {
char b[BDEVNAME_SIZE];
@@ -2433,15 +2470,16 @@ repeat:
/* only need to write one superblock... */
break;
}
- md_super_wait(mddev);
- /* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */
+ if (md_super_wait(mddev) < 0)
+ goto rewrite;
+ /* if there was a failure, MD_SB_CHANGE_DEVS was set, and we re-write super */
if (mddev_is_clustered(mddev) && ret == 0)
md_cluster_ops->metadata_update_finish(mddev);
if (mddev->in_sync != sync_req ||
- !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING),
- BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))
+ !bit_clear_unless(&mddev->sb_flags, BIT(MD_SB_CHANGE_PENDING),
+ BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_CLEAN)))
/* have to write it out again */
goto repeat;
wake_up(&mddev->sb_wait);
@@ -2485,7 +2523,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
}
sysfs_notify_dirent_safe(rdev->sysfs_state);
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
if (mddev->degraded)
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -2523,51 +2561,41 @@ struct rdev_sysfs_entry {
static ssize_t
state_show(struct md_rdev *rdev, char *page)
{
- char *sep = "";
+ char *sep = ",";
size_t len = 0;
unsigned long flags = ACCESS_ONCE(rdev->flags);
if (test_bit(Faulty, &flags) ||
- rdev->badblocks.unacked_exist) {
- len+= sprintf(page+len, "%sfaulty",sep);
- sep = ",";
- }
- if (test_bit(In_sync, &flags)) {
- len += sprintf(page+len, "%sin_sync",sep);
- sep = ",";
- }
- if (test_bit(Journal, &flags)) {
- len += sprintf(page+len, "%sjournal",sep);
- sep = ",";
- }
- if (test_bit(WriteMostly, &flags)) {
- len += sprintf(page+len, "%swrite_mostly",sep);
- sep = ",";
- }
+ (!test_bit(ExternalBbl, &flags) &&
+ rdev->badblocks.unacked_exist))
+ len += sprintf(page+len, "faulty%s", sep);
+ if (test_bit(In_sync, &flags))
+ len += sprintf(page+len, "in_sync%s", sep);
+ if (test_bit(Journal, &flags))
+ len += sprintf(page+len, "journal%s", sep);
+ if (test_bit(WriteMostly, &flags))
+ len += sprintf(page+len, "write_mostly%s", sep);
if (test_bit(Blocked, &flags) ||
(rdev->badblocks.unacked_exist
- && !test_bit(Faulty, &flags))) {
- len += sprintf(page+len, "%sblocked", sep);
- sep = ",";
- }
+ && !test_bit(Faulty, &flags)))
+ len += sprintf(page+len, "blocked%s", sep);
if (!test_bit(Faulty, &flags) &&
!test_bit(Journal, &flags) &&
- !test_bit(In_sync, &flags)) {
- len += sprintf(page+len, "%sspare", sep);
- sep = ",";
- }
- if (test_bit(WriteErrorSeen, &flags)) {
- len += sprintf(page+len, "%swrite_error", sep);
- sep = ",";
- }
- if (test_bit(WantReplacement, &flags)) {
- len += sprintf(page+len, "%swant_replacement", sep);
- sep = ",";
- }
- if (test_bit(Replacement, &flags)) {
- len += sprintf(page+len, "%sreplacement", sep);
- sep = ",";
- }
+ !test_bit(In_sync, &flags))
+ len += sprintf(page+len, "spare%s", sep);
+ if (test_bit(WriteErrorSeen, &flags))
+ len += sprintf(page+len, "write_error%s", sep);
+ if (test_bit(WantReplacement, &flags))
+ len += sprintf(page+len, "want_replacement%s", sep);
+ if (test_bit(Replacement, &flags))
+ len += sprintf(page+len, "replacement%s", sep);
+ if (test_bit(ExternalBbl, &flags))
+ len += sprintf(page+len, "external_bbl%s", sep);
+ if (test_bit(FailFast, &flags))
+ len += sprintf(page+len, "failfast%s", sep);
+
+ if (len)
+ len -= strlen(sep);
return len+sprintf(page+len, "\n");
}
@@ -2587,6 +2615,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
* so that it gets rebuilt based on bitmap
* write_error - sets WriteErrorSeen
* -write_error - clears WriteErrorSeen
+ * {,-}failfast - set/clear FailFast
*/
int err = -EINVAL;
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
@@ -2610,8 +2639,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
if (err == 0) {
md_kick_rdev_from_array(rdev);
- if (mddev->pers)
- md_update_sb(mddev, 1);
+ if (mddev->pers) {
+ set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
+ md_wakeup_thread(mddev->thread);
+ }
md_new_event(mddev);
}
}
@@ -2626,6 +2657,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
err = 0;
} else if (cmd_match(buf, "-blocked")) {
if (!test_bit(Faulty, &rdev->flags) &&
+ !test_bit(ExternalBbl, &rdev->flags) &&
rdev->badblocks.unacked_exist) {
/* metadata handler doesn't understand badblocks,
* so we need to fail the device
@@ -2642,6 +2674,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
set_bit(In_sync, &rdev->flags);
err = 0;
+ } else if (cmd_match(buf, "failfast")) {
+ set_bit(FailFast, &rdev->flags);
+ err = 0;
+ } else if (cmd_match(buf, "-failfast")) {
+ clear_bit(FailFast, &rdev->flags);
+ err = 0;
} else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 &&
!test_bit(Journal, &rdev->flags)) {
if (rdev->mddev->pers == NULL) {
@@ -2708,6 +2746,13 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
}
} else
err = -EBUSY;
+ } else if (cmd_match(buf, "external_bbl") && (rdev->mddev->external)) {
+ set_bit(ExternalBbl, &rdev->flags);
+ rdev->badblocks.shift = 0;
+ err = 0;
+ } else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
+ clear_bit(ExternalBbl, &rdev->flags);
+ err = 0;
}
if (!err)
sysfs_notify_dirent_safe(rdev->sysfs_state);
@@ -3211,10 +3256,8 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
sector_t size;
rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
- if (!rdev) {
- printk(KERN_ERR "md: could not alloc mem for new device!\n");
+ if (!rdev)
return ERR_PTR(-ENOMEM);
- }
err = md_rdev_init(rdev);
if (err)
@@ -3231,8 +3274,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
size = i_size_read(rdev->bdev->bd_inode) >> BLOCK_SIZE_BITS;
if (!size) {
- printk(KERN_WARNING
- "md: %s has zero or unknown size, marking faulty!\n",
+ pr_warn("md: %s has zero or unknown size, marking faulty!\n",
bdevname(rdev->bdev,b));
err = -EINVAL;
goto abort_free;
@@ -3242,16 +3284,13 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
err = super_types[super_format].
load_super(rdev, NULL, super_minor);
if (err == -EINVAL) {
- printk(KERN_WARNING
- "md: %s does not have a valid v%d.%d "
- "superblock, not importing!\n",
+ pr_warn("md: %s does not have a valid v%d.%d superblock, not importing!\n",
bdevname(rdev->bdev,b),
- super_format, super_minor);
+ super_format, super_minor);
goto abort_free;
}
if (err < 0) {
- printk(KERN_WARNING
- "md: could not read %s's sb, not importing!\n",
+ pr_warn("md: could not read %s's sb, not importing!\n",
bdevname(rdev->bdev,b));
goto abort_free;
}
@@ -3287,9 +3326,7 @@ static void analyze_sbs(struct mddev *mddev)
case 0:
break;
default:
- printk( KERN_ERR \
- "md: fatal superblock inconsistency in %s"
- " -- removing from array\n",
+ pr_warn("md: fatal superblock inconsistency in %s -- removing from array\n",
bdevname(rdev->bdev,b));
md_kick_rdev_from_array(rdev);
}
@@ -3302,18 +3339,16 @@ static void analyze_sbs(struct mddev *mddev)
if (mddev->max_disks &&
(rdev->desc_nr >= mddev->max_disks ||
i > mddev->max_disks)) {
- printk(KERN_WARNING
- "md: %s: %s: only %d devices permitted\n",
- mdname(mddev), bdevname(rdev->bdev, b),
- mddev->max_disks);
+ pr_warn("md: %s: %s: only %d devices permitted\n",
+ mdname(mddev), bdevname(rdev->bdev, b),
+ mddev->max_disks);
md_kick_rdev_from_array(rdev);
continue;
}
if (rdev != freshest) {
if (super_types[mddev->major_version].
validate_super(mddev, rdev)) {
- printk(KERN_WARNING "md: kicking non-fresh %s"
- " from array!\n",
+ pr_warn("md: kicking non-fresh %s from array!\n",
bdevname(rdev->bdev,b));
md_kick_rdev_from_array(rdev);
continue;
@@ -3384,7 +3419,7 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
unsigned long msec;
if (mddev_is_clustered(mddev)) {
- pr_info("md: Safemode is disabled for clustered mode\n");
+ pr_warn("md: Safemode is disabled for clustered mode\n");
return -EINVAL;
}
@@ -3472,8 +3507,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
rv = -EINVAL;
if (!mddev->pers->quiesce) {
- printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
- mdname(mddev), mddev->pers->name);
+ pr_warn("md: %s: %s does not support online personality change\n",
+ mdname(mddev), mddev->pers->name);
goto out_unlock;
}
@@ -3491,7 +3526,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
pers = find_pers(level, clevel);
if (!pers || !try_module_get(pers->owner)) {
spin_unlock(&pers_lock);
- printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
+ pr_warn("md: personality %s not loaded\n", clevel);
rv = -EINVAL;
goto out_unlock;
}
@@ -3505,8 +3540,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
}
if (!pers->takeover) {
module_put(pers->owner);
- printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
- mdname(mddev), clevel);
+ pr_warn("md: %s: %s does not support personality takeover\n",
+ mdname(mddev), clevel);
rv = -EINVAL;
goto out_unlock;
}
@@ -3526,8 +3561,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
module_put(pers->owner);
- printk(KERN_WARNING "md: %s: %s would not accept array\n",
- mdname(mddev), clevel);
+ pr_warn("md: %s: %s would not accept array\n",
+ mdname(mddev), clevel);
rv = PTR_ERR(priv);
goto out_unlock;
}
@@ -3570,9 +3605,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
pers->sync_request != NULL) {
/* need to add the md_redundancy_group */
if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
- printk(KERN_WARNING
- "md: cannot register extra attributes for %s\n",
- mdname(mddev));
+ pr_warn("md: cannot register extra attributes for %s\n",
+ mdname(mddev));
mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
}
if (oldpers->sync_request != NULL &&
@@ -3603,9 +3637,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
clear_bit(In_sync, &rdev->flags);
else {
if (sysfs_link_rdev(mddev, rdev))
- printk(KERN_WARNING "md: cannot register rd%d"
- " for %s after level change\n",
- rdev->raid_disk, mdname(mddev));
+ pr_warn("md: cannot register rd%d for %s after level change\n",
+ rdev->raid_disk, mdname(mddev));
}
}
@@ -3618,7 +3651,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
}
blk_set_stacking_limits(&mddev->queue->limits);
pers->run(mddev);
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
mddev_resume(mddev);
if (!mddev->thread)
md_update_sb(mddev, 1);
@@ -3813,7 +3846,7 @@ resync_start_store(struct mddev *mddev, const char *buf, size_t len)
if (!err) {
mddev->recovery_cp = n;
if (mddev->pers)
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
}
mddev_unlock(mddev);
return err ?: len;
@@ -3887,7 +3920,7 @@ array_state_show(struct mddev *mddev, char *page)
st = read_auto;
break;
case 0:
- if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
+ if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
st = write_pending;
else if (mddev->in_sync)
st = clean;
@@ -3925,7 +3958,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
spin_lock(&mddev->lock);
if (st == active) {
restart_array(mddev);
- clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+ clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
+ md_wakeup_thread(mddev->thread);
wake_up(&mddev->sb_wait);
err = 0;
} else /* st == clean */ {
@@ -3935,7 +3969,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
mddev->in_sync = 1;
if (mddev->safemode == 1)
mddev->safemode = 0;
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
}
err = 0;
} else
@@ -4001,7 +4035,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
mddev->in_sync = 1;
if (mddev->safemode == 1)
mddev->safemode = 0;
- set_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
}
err = 0;
} else
@@ -4015,7 +4049,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
err = restart_array(mddev);
if (err)
break;
- clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+ clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
wake_up(&mddev->sb_wait);
err = 0;
} else {
@@ -5071,13 +5105,13 @@ static int md_alloc(dev_t dev, char *name)
/* This isn't possible, but as kobject_init_and_add is marked
* __must_check, we must do something with the result
*/
- printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
- disk->disk_name);
+ pr_debug("md: cannot register %s/md - name in use\n",
+ disk->disk_name);
error = 0;
}
if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_bitmap_group))
- printk(KERN_DEBUG "pointless warning\n");
+ pr_debug("pointless warning\n");
mutex_unlock(&mddev->open_mutex);
abort:
mutex_unlock(&disks_mutex);
@@ -5179,15 +5213,15 @@ int md_run(struct mddev *mddev)
if (mddev->dev_sectors &&
rdev->data_offset + mddev->dev_sectors
> rdev->sb_start) {
- printk("md: %s: data overlaps metadata\n",
- mdname(mddev));
+ pr_warn("md: %s: data overlaps metadata\n",
+ mdname(mddev));
return -EINVAL;
}
} else {
if (rdev->sb_start + rdev->sb_size/512
> rdev->data_offset) {
- printk("md: %s: metadata overlaps data\n",
- mdname(mddev));
+ pr_warn("md: %s: metadata overlaps data\n",
+ mdname(mddev));
return -EINVAL;
}
}
@@ -5202,11 +5236,11 @@ int md_run(struct mddev *mddev)
if (!pers || !try_module_get(pers->owner)) {
spin_unlock(&pers_lock);
if (mddev->level != LEVEL_NONE)
- printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
- mddev->level);
+ pr_warn("md: personality for level %d is not loaded!\n",
+ mddev->level);
else
- printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
- mddev->clevel);
+ pr_warn("md: personality for level %s is not loaded!\n",
+ mddev->clevel);
return -EINVAL;
}
spin_unlock(&pers_lock);
@@ -5236,21 +5270,16 @@ int md_run(struct mddev *mddev)
if (rdev < rdev2 &&
rdev->bdev->bd_contains ==
rdev2->bdev->bd_contains) {
- printk(KERN_WARNING
- "%s: WARNING: %s appears to be"
- " on the same physical disk as"
- " %s.\n",
- mdname(mddev),
- bdevname(rdev->bdev,b),
- bdevname(rdev2->bdev,b2));
+ pr_warn("%s: WARNING: %s appears to be on the same physical disk as %s.\n",
+ mdname(mddev),
+ bdevname(rdev->bdev,b),
+ bdevname(rdev2->bdev,b2));
warned = 1;
}
}
if (warned)
- printk(KERN_WARNING
- "True protection against single-disk"
- " failure might be compromised.\n");
+ pr_warn("True protection against single-disk failure might be compromised.\n");
}
mddev->recovery = 0;
@@ -5264,14 +5293,14 @@ int md_run(struct mddev *mddev)
err = pers->run(mddev);
if (err)
- printk(KERN_ERR "md: pers->run() failed ...\n");
+ pr_warn("md: pers->run() failed ...\n");
else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
- WARN_ONCE(!mddev->external_size, "%s: default size too small,"
- " but 'external_size' not in effect?\n", __func__);
- printk(KERN_ERR
- "md: invalid array_size %llu > default size %llu\n",
- (unsigned long long)mddev->array_sectors / 2,
- (unsigned long long)pers->size(mddev, 0, 0) / 2);
+ WARN_ONCE(!mddev->external_size,
+ "%s: default size too small, but 'external_size' not in effect?\n",
+ __func__);
+ pr_warn("md: invalid array_size %llu > default size %llu\n",
+ (unsigned long long)mddev->array_sectors / 2,
+ (unsigned long long)pers->size(mddev, 0, 0) / 2);
err = -EINVAL;
}
if (err == 0 && pers->sync_request &&
@@ -5281,8 +5310,8 @@ int md_run(struct mddev *mddev)
bitmap = bitmap_create(mddev, -1);
if (IS_ERR(bitmap)) {
err = PTR_ERR(bitmap);
- printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
- mdname(mddev), err);
+ pr_warn("%s: failed to create bitmap (%d)\n",
+ mdname(mddev), err);
} else
mddev->bitmap = bitmap;
@@ -5318,9 +5347,8 @@ int md_run(struct mddev *mddev)
if (pers->sync_request) {
if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_redundancy_group))
- printk(KERN_WARNING
- "md: cannot register extra attributes for %s\n",
- mdname(mddev));
+ pr_warn("md: cannot register extra attributes for %s\n",
+ mdname(mddev));
mddev->sysfs_action = sysfs_get_dirent_safe(mddev->kobj.sd, "sync_action");
} else if (mddev->ro == 2) /* auto-readonly not meaningful */
mddev->ro = 0;
@@ -5350,7 +5378,7 @@ int md_run(struct mddev *mddev)
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- if (mddev->flags & MD_UPDATE_SB_FLAGS)
+ if (mddev->sb_flags)
md_update_sb(mddev, 0);
md_new_event(mddev);
@@ -5421,8 +5449,7 @@ static int restart_array(struct mddev *mddev)
mddev->safemode = 0;
mddev->ro = 0;
set_disk_ro(disk, 0);
- printk(KERN_INFO "md: %s switched to read-write mode.\n",
- mdname(mddev));
+ pr_debug("md: %s switched to read-write mode.\n", mdname(mddev));
/* Kick recovery or resync if necessary */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
@@ -5446,6 +5473,7 @@ static void md_clean(struct mddev *mddev)
mddev->level = LEVEL_NONE;
mddev->clevel[0] = 0;
mddev->flags = 0;
+ mddev->sb_flags = 0;
mddev->ro = 0;
mddev->metadata_type[0] = 0;
mddev->chunk_sectors = 0;
@@ -5490,12 +5518,15 @@ static void __md_stop_writes(struct mddev *mddev)
del_timer_sync(&mddev->safemode_timer);
+ if (mddev->pers && mddev->pers->quiesce) {
+ mddev->pers->quiesce(mddev, 1);
+ mddev->pers->quiesce(mddev, 0);
+ }
bitmap_flush(mddev);
- md_super_wait(mddev);
if (mddev->ro == 0 &&
((!mddev->in_sync && !mddev_is_clustered(mddev)) ||
- (mddev->flags & MD_UPDATE_SB_FLAGS))) {
+ mddev->sb_flags)) {
/* mark array as shutdown cleanly */
if (!mddev_is_clustered(mddev))