summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c49
1 files changed, 45 insertions, 4 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 13d76767c2cf..dc83da69ca7c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2897,6 +2897,30 @@ sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous)
return r_sector;
}
+/*
+ * There are cases where we want handle_stripe_dirtying() and
+ * schedule_reconstruction() to delay towrite to some dev of a stripe.
+ *
+ * This function checks whether we want to delay the towrite. Specifically,
+ * we delay the towrite when:
+ *
+ * 1. degraded stripe has a non-overwrite to the missing dev, AND this
+ * stripe has data in journal (for other devices).
+ *
+ * In this case, when reading data for the non-overwrite dev, it is
+ * necessary to handle complex rmw of write back cache (prexor with
+ * orig_page, and xor with page). To keep read path simple, we would
+ * like to flush data in journal to RAID disks first, so complex rmw
+ * is handled in the write patch (handle_stripe_dirtying).
+ *
+ */
+static inline bool delay_towrite(struct r5dev *dev,
+ struct stripe_head_state *s)
+{
+ return !test_bit(R5_OVERWRITE, &dev->flags) &&
+ !test_bit(R5_Insync, &dev->flags) && s->injournal;
+}
+
static void
schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
int rcw, int expand)
@@ -2917,7 +2941,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
- if (dev->towrite) {
+ if (dev->towrite && !delay_towrite(dev, s)) {
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantdrain, &dev->flags);
if (!expand)
@@ -3494,10 +3518,26 @@ static void handle_stripe_fill(struct stripe_head *sh,
* midst of changing due to a write
*/
if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state &&
- !sh->reconstruct_state)
+ !sh->reconstruct_state) {
+
+ /*
+ * For degraded stripe with data in journal, do not handle
+ * read requests yet, instead, flush the stripe to raid
+ * disks first, this avoids handling complex rmw of write
+ * back cache (prexor with orig_page, and then xor with
+ * page) in the read path
+ */
+ if (s->injournal && s->failed) {
+ if (test_bit(STRIPE_R5C_CACHING, &sh->state))
+ r5c_make_stripe_write_out(sh);
+ goto out;
+ }
+
for (i = disks; i--; )
if (fetch_block(sh, s, i, disks))
break;
+ }
+out:
set_bit(STRIPE_HANDLE, &sh->state);
}
@@ -3653,7 +3693,8 @@ static int handle_stripe_dirtying(struct r5conf *conf,
} else for (i = disks; i--; ) {
/* would I have to read this buffer for read_modify_write */
struct r5dev *dev = &sh->dev[i];
- if ((dev->towrite || i == sh->pd_idx || i == sh->qd_idx ||
+ if (((dev->towrite && !delay_towrite(dev, s)) ||
+ i == sh->pd_idx || i == sh->qd_idx ||
test_bit(R5_InJournal, &dev->flags)) &&
!test_bit(R5_LOCKED, &dev->flags) &&
!(uptodate_for_rmw(dev) ||
@@ -3717,7 +3758,7 @@ static int handle_stripe_dirtying(struct r5conf *conf,
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
- if ((dev->towrite ||
+ if (((dev->towrite && !delay_towrite(dev, s)) ||
i == sh->pd_idx || i == sh->qd_idx ||
test_bit(R5_InJournal, &dev->flags)) &&
!test_bit(R5_LOCKED, &dev->flags) &&