summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2016-06-14 00:26:17 +0200
committerJens Axboe <axboe@fb.com>2016-06-13 21:43:04 -0600
commit1b228c98ceae9e6cbdf874360ff3c325fa6ac1ad (patch)
treeb772cd99365e000e2472f7f72684473dd1e2fda5
parentbca1cbaeac580f39d8777df35e1245ffe59d8079 (diff)
drbd: fix regression: protocol A sometimes synchronous, C sometimes double-latency
Regression introduced with 8.4.5 drbd: application writes may set-in-sync in protocol != C Overwriting the same block (LBA) while a former version is still "in-flight" to the peer (to be exact: we did not receive the P_BARRIER_ACK for its epoch yet) would wait for the full epoch of that former version to be acknowledged by the peer. In synchronous and quasi-synchronous protocols C and B, this may double the latency on overwrites. With protocol A, which is supposed to be asynchronous and only wait for local completion, it is even worse: it would make overwrites quasi-synchronous, they would be hit by the full RTT, which protocol A was specifically meant to avoid, and possibly the additional time it takes to drain the buffers first. Particularly bad for databases, or anything else that does frequent updates to the same blocks (various file system meta data). No impact if >= rtt passes between updates to the same block. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/block/drbd/drbd_req.c18
1 files changed, 11 insertions, 7 deletions
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index eef6e9575b4e..74903ab85ae9 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -977,16 +977,20 @@ static void complete_conflicting_writes(struct drbd_request *req)
sector_t sector = req->i.sector;
int size = req->i.size;
- i = drbd_find_overlap(&device->write_requests, sector, size);
- if (!i)
- return;
-
for (;;) {
- prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
- i = drbd_find_overlap(&device->write_requests, sector, size);
- if (!i)
+ drbd_for_each_overlap(i, &device->write_requests, sector, size) {
+ /* Ignore, if already completed to upper layers. */
+ if (i->completed)
+ continue;
+ /* Handle the first found overlap. After the schedule
+ * we have to restart the tree walk. */
break;
+ }
+ if (!i) /* if any */
+ break;
+
/* Indicate to wake up device->misc_wait on progress. */
+ prepare_to_wait(&device->misc_wait, &wait, TASK_UNINTERRUPTIBLE);
i->waiting = true;
spin_unlock_irq(&device->resource->req_lock);
schedule();