summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5-cache.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-18 12:04:41 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-18 12:04:41 -0700
commit8b4822de59d5d9919b9b045183a36c673ce20b73 (patch)
treee7caba608a475448022a2ee0b509a157f7caae8b /drivers/md/raid5-cache.c
parent667f867c93d0117dec83bc5be9018d1a3a94044d (diff)
parentd82dd0e34d0347be201fd274dc84cd645dccc064 (diff)
Merge tag 'md/4.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD fixes from Shaohua Li: - Several bug fixes for raid5-cache from Song Liu, mainly handle journal disk error - Fix bad block handling in choosing raid1 disk from Tomasz Majchrzak - Simplify external metadata array sysfs handling from Artur Paszkiewicz - Optimize raid0 discard handling from me, now raid0 will dispatch large discard IO directly to underlayer disks. * tag 'md/4.12-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: raid1: prefer disk without bad blocks md/r5cache: handle sync with data in write back cache md/r5cache: gracefully handle journal device errors for writeback mode md/raid1/10: avoid unnecessary locking md/raid5-cache: in r5l_do_submit_io(), submit io->split_bio first md/md0: optimize raid0 discard handling md: don't return -EAGAIN in md_allow_write for external metadata arrays md/raid5: make use of spin_lock_irq over local_irq_disable + spin_lock
Diffstat (limited to 'drivers/md/raid5-cache.c')
-rw-r--r--drivers/md/raid5-cache.c47
1 files changed, 35 insertions, 12 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 26ba09282e7c..4c00bc248287 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -24,6 +24,7 @@
#include "md.h"
#include "raid5.h"
#include "bitmap.h"
+#include "raid5-log.h"
/*
* metadata/data stored in disk with 4k size unit (a block) regardless
@@ -622,20 +623,30 @@ static void r5l_do_submit_io(struct r5l_log *log, struct r5l_io_unit *io)
__r5l_set_io_unit_state(io, IO_UNIT_IO_START);
spin_unlock_irqrestore(&log->io_list_lock, flags);
+ /*
+ * In case of journal device failures, submit_bio will get error
+ * and calls endio, then active stripes will continue write
+ * process. Therefore, it is not necessary to check Faulty bit
+ * of journal device here.
+ *
+ * We can't check split_bio after current_bio is submitted. If
+ * io->split_bio is null, after current_bio is submitted, current_bio
+ * might already be completed and the io_unit is freed. We submit
+ * split_bio first to avoid the issue.
+ */
+ if (io->split_bio) {
+ if (io->has_flush)
+ io->split_bio->bi_opf |= REQ_PREFLUSH;
+ if (io->has_fua)
+ io->split_bio->bi_opf |= REQ_FUA;
+ submit_bio(io->split_bio);
+ }
+
if (io->has_flush)
io->current_bio->bi_opf |= REQ_PREFLUSH;
if (io->has_fua)
io->current_bio->bi_opf |= REQ_FUA;
submit_bio(io->current_bio);
-
- if (!io->split_bio)
- return;
-
- if (io->has_flush)
- io->split_bio->bi_opf |= REQ_PREFLUSH;
- if (io->has_fua)
- io->split_bio->bi_opf |= REQ_FUA;
- submit_bio(io->split_bio);
}
/* deferred io_unit will be dispatched here */
@@ -670,6 +681,11 @@ static void r5c_disable_writeback_async(struct work_struct *work)
return;
pr_info("md/raid:%s: Disabling writeback cache for degraded array.\n",
mdname(mddev));
+
+ /* wait superblock change before suspend */
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+
mddev_suspend(mddev);
log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
mddev_resume(mddev);
@@ -2621,8 +2637,11 @@ int r5c_try_caching_write(struct r5conf *conf,
* When run in degraded mode, array is set to write-through mode.
* This check helps drain pending write safely in the transition to
* write-through mode.
+ *
+ * When a stripe is syncing, the write is also handled in write
+ * through mode.
*/
- if (s->failed) {
+ if (s->failed || test_bit(STRIPE_SYNCING, &sh->state)) {
r5c_make_stripe_write_out(sh);
return -EAGAIN;
}
@@ -2825,6 +2844,9 @@ void r5c_finish_stripe_write_out(struct r5conf *conf,
}
r5l_append_flush_payload(log, sh->sector);
+ /* stripe is flused to raid disks, we can do resync now */
+ if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
+ set_bit(STRIPE_HANDLE, &sh->state);
}
int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
@@ -2973,7 +2995,7 @@ ioerr:
return ret;
}
-void r5c_update_on_rdev_error(struct mddev *mddev)
+void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
{
struct r5conf *conf = mddev->private;
struct r5l_log *log = conf->log;
@@ -2981,7 +3003,8 @@ void r5c_update_on_rdev_error(struct mddev *mddev)
if (!log)
return;
- if (raid5_calc_degraded(conf) > 0 &&
+ if ((raid5_calc_degraded(conf) > 0 ||
+ test_bit(Journal, &rdev->flags)) &&
conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK)
schedule_work(&log->disable_writeback_work);
}