summaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-03-22 12:29:50 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-22 12:29:50 -0700
commit267d7b23dd62f6ec55e0fba777e456495c308fc7 (patch)
tree5c9fe0f07d5b87029b9c07eb003596c05d161a8f /drivers/md/raid1.c
parent28f23d1f3b6a6078312b6e9585e583cc7326fe22 (diff)
parentecb178bb2b154a40cfae9fa4c42e62ccfa81ac6b (diff)
Merge tag 'md-3.4' of git://neil.brown.name/md
Pull md updates for 3.4 from Neil Brown: "Mostly tidying up code in preparation for some bigger changes next time. A few bug fixes tagged for -stable. Main functionality change is that some RAID10 arrays can now grow to use extra space that may have been made available on the individual devices." Fixed up trivial conflicts with the k[un]map_atomic() cleanups in drivers/md/bitmap.c. * tag 'md-3.4' of git://neil.brown.name/md: (22 commits) md: Add judgement bb->unacked_exist in function md_ack_all_badblocks(). md: fix clearing of the 'changed' flags for the bad blocks list. md/bitmap: discard CHUNK_BLOCK_SHIFT macro md/bitmap: remove unnecessary indirection when allocating. md/bitmap: remove some pointless locking. md/bitmap: change a 'goto' to a normal 'if' construct. md/bitmap: move printing of bitmap status to bitmap.c md/bitmap: remove some unused noise from bitmap.h md/raid10 - support resizing some RAID10 arrays. md/raid1: handle merge_bvec_fn in member devices. md/raid10: handle merge_bvec_fn in member devices. md: add proper merge_bvec handling to RAID0 and Linear. md: tidy up rdev_for_each usage. md/raid1,raid10: avoid deadlock during resync/recovery. md/bitmap: ensure to load bitmap when creating via sysfs. md: don't set md arrays to readonly on shutdown. md: allow re-add to failed arrays. md/raid5: use atomic_dec_return() instead of atomic_dec() and atomic_read(). md: Use existed macros instead of numbers md/raid5: removed unused 'added_devices' variable. ...
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c98
1 files changed, 73 insertions, 25 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a0b225eb4ac4..4a40a200d769 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -523,6 +523,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL
+ || test_bit(Unmerged, &rdev->flags)
|| test_bit(Faulty, &rdev->flags))
continue;
if (!test_bit(In_sync, &rdev->flags) &&
@@ -614,6 +615,39 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
return best_disk;
}
+static int raid1_mergeable_bvec(struct request_queue *q,
+ struct bvec_merge_data *bvm,
+ struct bio_vec *biovec)
+{
+ struct mddev *mddev = q->queuedata;
+ struct r1conf *conf = mddev->private;
+ sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
+ int max = biovec->bv_len;
+
+ if (mddev->merge_check_needed) {
+ int disk;
+ rcu_read_lock();
+ for (disk = 0; disk < conf->raid_disks * 2; disk++) {
+ struct md_rdev *rdev = rcu_dereference(
+ conf->mirrors[disk].rdev);
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
+ struct request_queue *q =
+ bdev_get_queue(rdev->bdev);
+ if (q->merge_bvec_fn) {
+ bvm->bi_sector = sector +
+ rdev->data_offset;
+ bvm->bi_bdev = rdev->bdev;
+ max = min(max, q->merge_bvec_fn(
+ q, bvm, biovec));
+ }
+ }
+ }
+ rcu_read_unlock();
+ }
+ return max;
+
+}
+
int md_raid1_congested(struct mddev *mddev, int bits)
{
struct r1conf *conf = mddev->private;
@@ -737,9 +771,22 @@ static void wait_barrier(struct r1conf *conf)
spin_lock_irq(&conf->resync_lock);
if (conf->barrier) {
conf->nr_waiting++;
- wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
+ /* Wait for the barrier to drop.
+ * However if there are already pending
+ * requests (preventing the barrier from
+ * rising completely), and the
+ * pre-process bio queue isn't empty,
+ * then don't wait, as we need to empty
+ * that queue to get the nr_pending
+ * count down.
+ */
+ wait_event_lock_irq(conf->wait_barrier,
+ !conf->barrier ||
+ (conf->nr_pending &&
+ current->bio_list &&
+ !bio_list_empty(current->bio_list)),
conf->resync_lock,
- );
+ );
conf->nr_waiting--;
}
conf->nr_pending++;
@@ -1002,7 +1049,8 @@ read_again:
break;
}
r1_bio->bios[i] = NULL;
- if (!rdev || test_bit(Faulty, &rdev->flags)) {
+ if (!rdev || test_bit(Faulty, &rdev->flags)
+ || test_bit(Unmerged, &rdev->flags)) {
if (i < conf->raid_disks)
set_bit(R1BIO_Degraded, &r1_bio->state);
continue;
@@ -1322,6 +1370,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
struct mirror_info *p;
int first = 0;
int last = conf->raid_disks - 1;
+ struct request_queue *q = bdev_get_queue(rdev->bdev);
if (mddev->recovery_disabled == conf->recovery_disabled)
return -EBUSY;
@@ -1329,23 +1378,17 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
+ if (q->merge_bvec_fn) {
+ set_bit(Unmerged, &rdev->flags);
+ mddev->merge_check_needed = 1;
+ }
+
for (mirror = first; mirror <= last; mirror++) {
p = conf->mirrors+mirror;
if (!p->rdev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
- /* as we don't honour merge_bvec_fn, we must
- * never risk violating it, so limit
- * ->max_segments to one lying with a single
- * page, as a one page request is never in
- * violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
- blk_queue_max_segments(mddev->queue, 1);
- blk_queue_segment_boundary(mddev->queue,
- PAGE_CACHE_SIZE - 1);
- }
p->head_position = 0;
rdev->raid_disk = mirror;
@@ -1370,6 +1413,19 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break;
}
}
+ if (err == 0 && test_bit(Unmerged, &rdev->flags)) {
+ /* Some requests might not have seen this new
+ * merge_bvec_fn. We must wait for them to complete
+ * before merging the device fully.
+ * First we make sure any code which has tested
+ * our function has submitted the request, then
+ * we wait for all outstanding requests to complete.
+ */
+ synchronize_sched();
+ raise_barrier(conf);
+ lower_barrier(conf);
+ clear_bit(Unmerged, &rdev->flags);
+ }
md_integrity_add_rdev(rdev, mddev);
print_conf(conf);
return err;
@@ -2491,7 +2547,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
err = -EINVAL;
spin_lock_init(&conf->device_lock);
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
int disk_idx = rdev->raid_disk;
if (disk_idx >= mddev->raid_disks
|| disk_idx < 0)
@@ -2609,20 +2665,11 @@ static int run(struct mddev *mddev)
if (IS_ERR(conf))
return PTR_ERR(conf);
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
if (!mddev->gendisk)
continue;
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
- /* as we don't honour merge_bvec_fn, we must never risk
- * violating it, so limit ->max_segments to 1 lying within
- * a single page, as a one page request is never in violation.
- */
- if (rdev->bdev->bd_disk->queue->merge_bvec_fn) {
- blk_queue_max_segments(mddev->queue, 1);
- blk_queue_segment_boundary(mddev->queue,
- PAGE_CACHE_SIZE - 1);
- }
}
mddev->degraded = 0;
@@ -2656,6 +2703,7 @@ static int run(struct mddev *mddev)
if (mddev->queue) {
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
+ blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
}
return md_integrity_register(mddev);
}