summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-04-09 11:50:29 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-04-09 11:50:29 -0700
commit2f4084209adc77f9a1c9f38db3019a509e167882 (patch)
tree775657114c885505ecc46605e29ea1470e986f76
parent2f10ffcfb28beb35137d9e86992c771b4a6c5f2a (diff)
parent3440c49f5c5ecb4f29b0544aa87da71888404f8f (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (34 commits) cfq-iosched: Fix the incorrect timeslice accounting with forced_dispatch loop: Update mtime when writing using aops block: expose the statistics in blkio.time and blkio.sectors for the root cgroup backing-dev: Handle class_create() failure Block: Fix block/elevator.c elevator_get() off-by-one error drbd: lc_element_by_index() never returns NULL cciss: unlock on error path cfq-iosched: Do not merge queues of BE and IDLE classes cfq-iosched: Add additional blktrace log messages in CFQ for easier debugging i2o: Remove the dangerous kobj_to_i2o_device macro block: remove 16 bytes of padding from struct request on 64bits cfq-iosched: fix a kbuild regression block: make CONFIG_BLK_CGROUP visible Remove GENHD_FL_DRIVERFS block: Export max number of segments and max segment size in sysfs block: Finalize conversion of block limits functions block: Fix overrun in lcm() and move it to lib vfs: improve writeback_inodes_wb() paride: fix off-by-one test drbd: fix al-to-on-disk-bitmap for 4k logical_block_size ...
-rw-r--r--Documentation/DocBook/tracepoint.tmpl13
-rw-r--r--Documentation/block/biodoc.txt4
-rw-r--r--block/Kconfig3
-rw-r--r--block/blk-settings.c11
-rw-r--r--block/blk-sysfs.c25
-rw-r--r--block/cfq-iosched.c41
-rw-r--r--block/elevator.c2
-rw-r--r--drivers/block/DAC960.c1
-rw-r--r--drivers/block/drbd/drbd_actlog.c19
-rw-r--r--drivers/block/drbd/drbd_bitmap.c10
-rw-r--r--drivers/block/drbd/drbd_int.h12
-rw-r--r--drivers/block/drbd/drbd_main.c20
-rw-r--r--drivers/block/drbd/drbd_nl.c44
-rw-r--r--drivers/block/drbd/drbd_receiver.c34
-rw-r--r--drivers/block/drbd/drbd_worker.c18
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/paride/pcd.c4
-rw-r--r--drivers/block/paride/pf.c4
-rw-r--r--drivers/block/paride/pt.c4
-rw-r--r--drivers/block/virtio_blk.c5
-rw-r--r--drivers/scsi/sd.c2
-rw-r--r--fs/bio.c4
-rw-r--r--fs/fs-writeback.c133
-rw-r--r--include/linux/blkdev.h35
-rw-r--r--include/linux/drbd.h2
-rw-r--r--include/linux/drbd_nl.h3
-rw-r--r--include/linux/genhd.h2
-rw-r--r--include/linux/i2o.h1
-rw-r--r--include/linux/lcm.h8
-rw-r--r--include/linux/writeback.h3
-rw-r--r--include/trace/events/block.h164
-rw-r--r--lib/Makefile2
-rw-r--r--lib/lcm.c15
-rw-r--r--mm/backing-dev.c3
34 files changed, 475 insertions, 178 deletions
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
index 8bca1d5cec09..e8473eae2a20 100644
--- a/Documentation/DocBook/tracepoint.tmpl
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -16,6 +16,15 @@
</address>
</affiliation>
</author>
+ <author>
+ <firstname>William</firstname>
+ <surname>Cohen</surname>
+ <affiliation>
+ <address>
+ <email>wcohen@redhat.com</email>
+ </address>
+ </affiliation>
+ </author>
</authorgroup>
<legalnotice>
@@ -91,4 +100,8 @@
!Iinclude/trace/events/signal.h
</chapter>
+ <chapter id="block">
+ <title>Block IO</title>
+!Iinclude/trace/events/block.h
+ </chapter>
</book>
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6fab97ea7e6b..508b5b2b0289 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -1162,8 +1162,8 @@ where a driver received a request ala this before:
As mentioned, there is no virtual mapping of a bio. For DMA, this is
not a problem as the driver probably never will need a virtual mapping.
-Instead it needs a bus mapping (pci_map_page for a single segment or
-use blk_rq_map_sg for scatter gather) to be able to ship it to the driver. For
+Instead it needs a bus mapping (dma_map_page for a single segment or
+use dma_map_sg for scatter gather) to be able to ship it to the driver. For
PIO drivers (or drivers that need to revert to PIO transfer once in a
while (IDE for example)), where the CPU is doing the actual data
transfer a virtual mapping is needed. If the driver supports highmem I/O,
diff --git a/block/Kconfig b/block/Kconfig
index 62a5921321cd..f9e89f4d94bb 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -78,8 +78,9 @@ config BLK_DEV_INTEGRITY
Protection. If in doubt, say N.
config BLK_CGROUP
- tristate
+ tristate "Block cgroup support"
depends on CGROUPS
+ depends on CFQ_GROUP_IOSCHED
default n
---help---
Generic block IO controller cgroup interface. This is the common
diff --git a/block/blk-settings.c b/block/blk-settings.c
index d9a9db5f0a2b..f5ed5a1187ba 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -8,6 +8,7 @@
#include <linux/blkdev.h>
#include <linux/bootmem.h> /* for max_pfn/max_low_pfn */
#include <linux/gcd.h>
+#include <linux/lcm.h>
#include <linux/jiffies.h>
#include <linux/gfp.h>
@@ -462,16 +463,6 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
}
EXPORT_SYMBOL(blk_queue_stack_limits);
-static unsigned int lcm(unsigned int a, unsigned int b)
-{
- if (a && b)
- return (a * b) / gcd(a, b);
- else if (b)
- return b;
-
- return a;
-}
-
/**
* blk_stack_limits - adjust queue_limits for stacked devices
* @t: the stacking driver limits (top device)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index c2b821fa324a..306759bbdf1b 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -107,6 +107,19 @@ static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
return queue_var_show(max_sectors_kb, (page));
}
+static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
+{
+ return queue_var_show(queue_max_segments(q), (page));
+}
+
+static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
+{
+ if (test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags))
+ return queue_var_show(queue_max_segment_size(q), (page));
+
+ return queue_var_show(PAGE_CACHE_SIZE, (page));
+}
+
static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
{
return queue_var_show(queue_logical_block_size(q), page);
@@ -281,6 +294,16 @@ static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
.show = queue_max_hw_sectors_show,
};
+static struct queue_sysfs_entry queue_max_segments_entry = {
+ .attr = {.name = "max_segments", .mode = S_IRUGO },
+ .show = queue_max_segments_show,
+};
+
+static struct queue_sysfs_entry queue_max_segment_size_entry = {
+ .attr = {.name = "max_segment_size", .mode = S_IRUGO },
+ .show = queue_max_segment_size_show,
+};
+
static struct queue_sysfs_entry queue_iosched_entry = {
.attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
.show = elv_iosched_show,
@@ -356,6 +379,8 @@ static struct attribute *default_attrs[] = {
&queue_ra_entry.attr,
&queue_max_hw_sectors_entry.attr,
&queue_max_sectors_entry.attr,
+ &queue_max_segments_entry.attr,
+ &queue_max_segment_size_entry.attr,
&queue_iosched_entry.attr,
&queue_hw_sector_size_entry.attr,
&queue_logical_block_size_entry.attr,
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fc98a48554fd..838834be115b 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -48,6 +48,7 @@ static const int cfq_hist_divisor = 4;
#define CFQ_SERVICE_SHIFT 12
#define CFQQ_SEEK_THR (sector_t)(8 * 100)
+#define CFQQ_CLOSE_THR (sector_t)(8 * 1024)
#define CFQQ_SECT_THR_NONROT (sector_t)(2 * 32)
#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
@@ -948,6 +949,11 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
unsigned int major, minor;
cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+ if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
+ sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+ cfqg->blkg.dev = MKDEV(major, minor);
+ goto done;
+ }
if (cfqg || !create)
goto done;
@@ -1518,7 +1524,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
struct cfq_queue *cfqq)
{
if (cfqq) {
- cfq_log_cfqq(cfqd, cfqq, "set_active");
+ cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
+ cfqd->serving_prio, cfqd->serving_type);
cfqq->slice_start = 0;
cfqq->dispatch_start = jiffies;
cfqq->allocated_slice = 0;
@@ -1661,9 +1668,9 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd,
}
static inline int cfq_rq_close(struct cfq_data *cfqd, struct cfq_queue *cfqq,
- struct request *rq, bool for_preempt)
+ struct request *rq)
{
- return cfq_dist_from_last(cfqd, rq) <= CFQQ_SEEK_THR;
+ return cfq_dist_from_last(cfqd, rq) <= CFQQ_CLOSE_THR;
}
static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
@@ -1690,7 +1697,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
* will contain the closest sector.
*/
__cfqq = rb_entry(parent, struct cfq_queue, p_node);
- if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
+ if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
return __cfqq;
if (blk_rq_pos(__cfqq->next_rq) < sector)
@@ -1701,7 +1708,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
return NULL;
__cfqq = rb_entry(node, struct cfq_queue, p_node);
- if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq, false))
+ if (cfq_rq_close(cfqd, cur_cfqq, __cfqq->next_rq))
return __cfqq;
return NULL;
@@ -1722,6 +1729,8 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
{
struct cfq_queue *cfqq;
+ if (cfq_class_idle(cur_cfqq))
+ return NULL;
if (!cfq_cfqq_sync(cur_cfqq))
return NULL;
if (CFQQ_SEEKY(cur_cfqq))
@@ -1788,7 +1797,11 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq)
* Otherwise, we do only if they are the last ones
* in their service tree.
*/
- return service_tree->count == 1 && cfq_cfqq_sync(cfqq);
+ if (service_tree->count == 1 && cfq_cfqq_sync(cfqq))
+ return 1;
+ cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d",
+ service_tree->count);
+ return 0;
}
static void cfq_arm_slice_timer(struct cfq_data *cfqd)
@@ -1833,8 +1846,11 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
* time slice.
*/
if (sample_valid(cic->ttime_samples) &&
- (cfqq->slice_end - jiffies < cic->ttime_mean))
+ (cfqq->slice_end - jiffies < cic->ttime_mean)) {
+ cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%d",
+ cic->ttime_mean);
return;
+ }
cfq_mark_cfqq_wait_request(cfqq);
@@ -2042,6 +2058,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
slice = max(slice, 2 * cfqd->cfq_slice_idle);
slice = max_t(unsigned, slice, CFQ_MIN_TT);
+ cfq_log(cfqd, "workload slice:%d", slice);
cfqd->workload_expires = jiffies + slice;
cfqd->noidle_tree_requires_idle = false;
}
@@ -2189,10 +2206,13 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
struct cfq_queue *cfqq;
int dispatched = 0;
- while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL)
+ /* Expire the timeslice of the current active queue first */
+ cfq_slice_expired(cfqd, 0);
+ while ((cfqq = cfq_get_next_queue_forced(cfqd)) != NULL) {
+ __cfq_set_active_queue(cfqd, cfqq);
dispatched += __cfq_forced_dispatch_cfqq(cfqq);
+ }
- cfq_slice_expired(cfqd, 0);
BUG_ON(cfqd->busy_queues);
cfq_log(cfqd, "forced_dispatch=%d", dispatched);
@@ -3104,7 +3124,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
* if this request is as-good as one we would expect from the
* current cfqq, let it preempt
*/
- if (cfq_rq_close(cfqd, cfqq, rq, true))
+ if (cfq_rq_close(cfqd, cfqq, rq))
return true;
return false;
@@ -3308,6 +3328,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
if (cfq_should_wait_busy(cfqd, cfqq)) {
cfqq->slice_end = jiffies + cfqd->cfq_slice_idle;
cfq_mark_cfqq_wait_busy(cfqq);
+ cfq_log_cfqq(cfqd, cfqq, "will busy wait");
}
/*
diff --git a/block/elevator.c b/block/elevator.c
index df75676f6671..76e3702d5381 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -154,7 +154,7 @@ static struct elevator_type *elevator_get(const char *name)
spin_unlock(&elv_list_lock);
- sprintf(elv, "%s-iosched", name);
+ snprintf(elv, sizeof(elv), "%s-iosched", name);
request_module("%s", elv);
spin_lock(&elv_list_lock);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 459f1bc25a7b..c5f22bb0a48e 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -2533,7 +2533,6 @@ static bool DAC960_RegisterBlockDevice(DAC960_Controller_T *Controller)
Controller->RequestQueue[n] = RequestQueue;
blk_queue_bounce_limit(RequestQueue, Controller->BounceBufferLimit);
RequestQueue->queuedata = Controller;
- blk_queue_max_hw_segments(RequestQueue, Controller->DriverScatterGatherLimit);
blk_queue_max_segments(RequestQueue, Controller->DriverScatterGatherLimit);
blk_queue_max_hw_sectors(RequestQueue, Controller->MaxBlocksPerCommand);
disk->queue = RequestQueue;
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 17956ff6a08d..df018990c422 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -536,7 +536,9 @@ static void atodb_endio(struct bio *bio, int error)
put_ldev(mdev);
}
+/* sector to word */
#define S2W(s) ((s)<<(BM_EXT_SHIFT-BM_BLOCK_SHIFT-LN2_BPL))
+
/* activity log to on disk bitmap -- prepare bio unless that sector
* is already covered by previously prepared bios */
static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
@@ -546,13 +548,20 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
{
struct bio *bio;
struct page *page;
- sector_t on_disk_sector = enr + mdev->ldev->md.md_offset
- + mdev->ldev->md.bm_offset;
+ sector_t on_disk_sector;
unsigned int page_offset = PAGE_SIZE;
int offset;
int i = 0;
int err = -ENOMEM;
+ /* We always write aligned, full 4k blocks,
+ * so we can ignore the logical_block_size (for now) */
+ enr &= ~7U;
+ on_disk_sector = enr + mdev->ldev->md.md_offset
+ + mdev->ldev->md.bm_offset;
+
+ D_ASSERT(!(on_disk_sector & 7U));
+
/* Check if that enr is already covered by an already created bio.
* Caution, bios[] is not NULL terminated,
* but only initialized to all NULL.
@@ -588,7 +597,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
offset = S2W(enr);
drbd_bm_get_lel(mdev, offset,
- min_t(size_t, S2W(1), drbd_bm_words(mdev) - offset),
+ min_t(size_t, S2W(8), drbd_bm_words(mdev) - offset),
kmap(page) + page_offset);
kunmap(page);
@@ -597,7 +606,7 @@ static int atodb_prepare_unless_covered(struct drbd_conf *mdev,
bio->bi_bdev = mdev->ldev->md_bdev;
bio->bi_sector = on_disk_sector;
- if (bio_add_page(bio, page, MD_SECTOR_SIZE, page_offset) != MD_SECTOR_SIZE)
+ if (bio_add_page(bio, page, 4096, page_offset) != 4096)
goto out_put_page;
atomic_inc(&wc->count);
@@ -1327,7 +1336,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev)
/* ok, ->resync is there. */
for (i = 0; i < mdev->resync->nr_elements; i++) {
e = lc_element_by_index(mdev->resync, i);
- bm_ext = e ? lc_entry(e, struct bm_extent, lce) : NULL;
+ bm_ext = lc_entry(e, struct bm_extent, lce);
if (bm_ext->lce.lc_number == LC_FREE)
continue;
if (bm_ext->lce.lc_number == mdev->resync_wenr) {
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 3d6f3d988949..3390716898d5 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -67,7 +67,7 @@ struct drbd_bitmap {
size_t bm_words;
size_t bm_number_of_pages;
sector_t bm_dev_capacity;
- struct semaphore bm_change; /* serializes resize operations */
+ struct mutex bm_change; /* serializes resize operations */
atomic_t bm_async_io;
wait_queue_head_t bm_io_wait;
@@ -115,7 +115,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
return;
}
- trylock_failed = down_trylock(&b->bm_change);
+ trylock_failed = !mutex_trylock(&b->bm_change);
if (trylock_failed) {
dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n",
@@ -126,7 +126,7 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
b->bm_task == mdev->receiver.task ? "receiver" :
b->bm_task == mdev->asender.task ? "asender" :
b->bm_task == mdev->worker.task ? "worker" : "?");
- down(&b->bm_change);
+ mutex_lock(&b->bm_change);
}
if (__test_and_set_bit(BM_LOCKED, &b->bm_flags))
dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
@@ -148,7 +148,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
b->bm_why = NULL;
b->bm_task = NULL;
- up(&b->bm_change);
+ mutex_unlock(&b->bm_change);
}
/* word offset to long pointer */
@@ -296,7 +296,7 @@ int drbd_bm_init(struct drbd_conf *mdev)
if (!b)
return -ENOMEM;
spin_lock_init(&b->bm_lock);
- init_MUTEX(&b->bm_change);
+ mutex_init(&b->bm_change);
init_waitqueue_head(&b->bm_io_wait);
mdev->bitmap = b;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index d9301e861d9f..e5e86a781820 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -261,6 +261,9 @@ static inline const char *cmdname(enum drbd_packets cmd)
[P_OV_REQUEST] = "OVRequest",
[P_OV_REPLY] = "OVReply",
[P_OV_RESULT] = "OVResult",
+ [P_CSUM_RS_REQUEST] = "CsumRSRequest",
+ [P_RS_IS_IN_SYNC] = "CsumRSIsInSync",
+ [P_COMPRESSED_BITMAP] = "CBitmap",
[P_MAX_CMD] = NULL,
};
@@ -443,13 +446,18 @@ struct p_rs_param_89 {
char csums_alg[SHARED_SECRET_MAX];
} __packed;
+enum drbd_conn_flags {
+ CF_WANT_LOSE = 1,
+ CF_DRY_RUN = 2,
+};
+
struct p_protocol {
struct p_header head;
u32 protocol;
u32 after_sb_0p;
u32 after_sb_1p;
u32 after_sb_2p;
- u32 want_lose;
+ u32 conn_flags;
u32 two_primaries;
/* Since protocol version 87 and higher. */
@@ -791,6 +799,8 @@ enum {
* while this is set. */
RESIZE_PENDING, /* Size change detected locally, waiting for the response from
* the peer, if it changed there as well. */
+ CONN_DRY_RUN, /* Expect disconnect after resync handshake. */
+ GOT_PING_ACK, /* set when we receive a ping_ack packet, misc wait gets woken */
};
struct drbd_bitmap; /* opaque for drbd_conf */
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index ab871e00ffc5..67e0fc542249 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1668,7 +1668,7 @@ int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
int drbd_send_protocol(struct drbd_conf *mdev)
{
struct p_protocol *p;
- int size, rv;
+ int size, cf, rv;
size = sizeof(struct p_protocol);
@@ -1685,9 +1685,21 @@ int drbd_send_protocol(struct drbd_conf *mdev)
p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p);
p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p);
p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p);
- p->want_lose = cpu_to_be32(mdev->net_conf->want_lose);
p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
+ cf = 0;
+ if (mdev->net_conf->want_lose)
+ cf |= CF_WANT_LOSE;
+ if (mdev->net_conf->dry_run) {
+ if (mdev->agreed_pro_version >= 92)
+ cf |= CF_DRY_RUN;
+ else {
+ dev_err(DEV, "--dry-run is not supported by peer");
+ return 0;
+ }
+ }
+ p->conn_flags = cpu_to_be32(cf);
+
if (mdev->agreed_pro_version >= 87)
strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
@@ -3161,14 +3173,18 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
void drbd_free_sock(struct drbd_conf *mdev)
{
if (mdev->data.socket) {
+ mutex_lock(&mdev->data.mutex);
kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
sock_release(mdev->data.socket);
mdev->data.socket = NULL;
+ mutex_unlock(&mdev->data.mutex);
}
if (mdev->meta.socket) {
+ mutex_lock(&mdev->meta.mutex);
kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
sock_release(mdev->meta.socket);
mdev->meta.socket = NULL;
+ mutex_unlock(&mdev->meta.mutex);
}
}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 4df3b40b1057..6429d2b19e06 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -285,8 +285,8 @@ int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
}
if (r == SS_NO_UP_TO_DATE_DISK && force &&
- (mdev->state.disk == D_INCONSISTENT ||
- mdev->state.disk == D_OUTDATED)) {
+ (mdev->state.disk < D_UP_TO_DATE &&
+ mdev->state.disk >= D_INCONSISTENT)) {
mask.disk = D_MASK;
val.disk = D_UP_TO_DATE;
forced = 1;
@@ -407,7 +407,7 @@ static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
}
reply->ret_code =
- drbd_set_role(mdev, R_PRIMARY, primary_args.overwrite_peer);
+ drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
return 0;
}
@@ -941,6 +941,25 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
drbd_md_set_sector_offsets(mdev, nbc);
+ /* allocate a second IO page if logical_block_size != 512 */
+ logical_block_size = bdev_logical_block_size(nbc->md_bdev);
+ if (logical_block_size == 0)
+ logical_block_size = MD_SECTOR_SIZE;
+
+ if (logical_block_size != MD_SECTOR_SIZE) {
+ if (!mdev->md_io_tmpp) {
+ struct page *page = alloc_page(GFP_NOIO);
+ if (!page)
+ goto force_diskless_dec;
+
+ dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
+ logical_block_size, MD_SECTOR_SIZE);
+ dev_warn(DEV, "Workaround engaged (has performance impact).\n");
+
+ mdev->md_io_tmpp = page;
+ }
+ }
+
if (!mdev->bitmap) {
if (drbd_bm_init(mdev)) {
retcode = ERR_NOMEM;
@@ -980,25 +999,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
goto force_diskless_dec;
}
- /* allocate a second IO page if logical_block_size != 512 */
- logical_block_size = bdev_logical_block_size(nbc->md_bdev);
- if (logical_block_size == 0)
- logical_block_size = MD_SECTOR_SIZE;
-
- if (logical_block_size != MD_SECTOR_SIZE) {
- if (!mdev->md_io_tmpp) {
- struct page *page = alloc_page(GFP_NOIO);
- if (!page)
- goto force_diskless_dec;
-
- dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
- logical_block_size, MD_SECTOR_SIZE);
- dev_warn(DEV, "Workaround engaged (has performance impact).\n");
-
- mdev->md_io_tmpp = page;
- }
- }
-
/* Reset the "barriers don't work" bits here, then force meta data to
* be written, to ensure we determine if barriers are supported. */
if (nbc->dc.no_md_flush)
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index d065c646b35a..ed9f1de24a71 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2513,6 +2513,10 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
}
if (hg == -100) {
+ /* FIXME this log message is not correct if we end up here
+ * after an attempted attach on a diskless node.
+ * We just refuse to attach -- well, we drop the "connection"
+ * to that disk, in a way... */
dev_alert(DEV, "Split-Brain detected, dropping connection!\n");
drbd_khelper(mdev, "split-brain");
return C_MASK;
@@ -2538,6 +2542,16 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
}
}
+ if (mdev->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
+ if (hg == 0)
+ dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
+ else
+ dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
+ drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
+ abs(hg) >= 2 ? "full" : "bit-map based");
+ return C_MASK;
+ }
+
if (abs(hg) >= 2) {
dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake"))
@@ -2585,7 +2599,7 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
struct p_protocol *p = (struct p_protocol *)h;
int header_size, data_size;
int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
- int p_want_lose, p_two_primaries;
+ int p_want_lose, p_two_primaries, cf;
char p_integrity_alg[SHARED_SECRET_MAX] = "";
header_size = sizeof(*p) - sizeof(*h);
@@ -2598,8 +2612,14 @@ static int receive_protocol(struct drbd_conf *mdev, struct p_header *h)
p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
- p_want_lose = be32_to_cpu(p->want_lose);
p_two_primaries = be32_to_cpu(p->two_primaries);
+ cf = be32_to_cpu(p->conn_flags);
+ p_want_lose = cf & CF_WANT_LOSE;
+
+ clear_bit(CONN_DRY_RUN, &mdev->flags);
+
+ if (cf & CF_DRY_RUN)
+ set_bit(CONN_DRY_RUN, &mdev->flags);
if (p_proto != mdev->net_conf->wire_protocol) {
dev_err(DEV, "incompatible communication protocols\n");
@@ -3118,13 +3138,16 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h)
put_ldev(mdev);
if (nconn == C_MASK) {
+ nconn = C_CONNECTED;
if (mdev->state.disk == D_NEGOTIATING) {
drbd_force_state(mdev, NS(disk, D_DISKLESS));
- nconn = C_CONNECTED;
} else if (peer_state.disk == D_NEGOTIATING) {
dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
peer_state.disk = D_DISKLESS;
+ real_peer_disk = D_DISKLESS;
} else {
+ if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
+ return FALSE;
D_ASSERT(oconn == C_WF_REPORT_PARAMS);
drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
return FALSE;
@@ -3594,10 +3617,7 @@ static void drbd_disconnect(struct drbd_conf *mdev)
/* asender does not clean up anything. it must not interfere, either */
drbd_thread_stop(&mdev->asender);
-
- mutex_lock(&mdev->data.mutex);
drbd_free_sock(mdev);
- mutex_unlock(&mdev->data.mutex);
spin_lock_irq(&mdev->req_lock);
_drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
@@ -4054,6 +4074,8 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h)
{
/* restore idle timeout */
mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ;
+ if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
+ wake_up(&mdev->misc_wait);
return TRUE;
}
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index b453c2bca3be..44bf6d11197e 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -938,7 +938,8 @@ int w_e_end_csum_rs_req(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
if (eq) {
drbd_set_in_sync(mdev, e->sector, e->size);
- mdev->rs_same_csum++;
+ /* rs_same_csums unit is BM_BLOCK_SIZE */
+ mdev->rs_same_csum += e->size >> BM_BLOCK_SHIFT;
ok = drbd_send_ack(mdev, P_RS_IS_IN_SYNC, e);
} else {
inc_rs_pending(mdev);
@@ -1288,6 +1289,14 @@ int drbd_alter_sa(struct drbd_conf *mdev, int na)
return retcode