summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/amiflop.c2
-rw-r--r--drivers/block/aoe/aoe.h4
-rw-r--r--drivers/block/aoe/aoeblk.c1
-rw-r--r--drivers/block/aoe/aoecmd.c27
-rw-r--r--drivers/block/aoe/aoedev.c11
-rw-r--r--drivers/block/aoe/aoemain.c2
-rw-r--r--drivers/block/ataflop.c26
-rw-r--r--drivers/block/drbd/drbd_debugfs.c2
-rw-r--r--drivers/block/drbd/drbd_int.h19
-rw-r--r--drivers/block/drbd/drbd_main.c30
-rw-r--r--drivers/block/drbd/drbd_nl.c133
-rw-r--r--drivers/block/drbd/drbd_protocol.h47
-rw-r--r--drivers/block/drbd/drbd_receiver.c253
-rw-r--r--drivers/block/drbd/drbd_req.c19
-rw-r--r--drivers/block/drbd/drbd_req.h2
-rw-r--r--drivers/block/drbd/drbd_state.c11
-rw-r--r--drivers/block/drbd/drbd_state.h5
-rw-r--r--drivers/block/drbd/drbd_worker.c2
-rw-r--r--drivers/block/floppy.c6
-rw-r--r--drivers/block/loop.c416
-rw-r--r--drivers/block/loop.h1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c226
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h48
-rw-r--r--drivers/block/nbd.c3
-rw-r--r--drivers/block/null_blk.h1
-rw-r--r--drivers/block/null_blk_main.c21
-rw-r--r--drivers/block/null_blk_zoned.c27
-rw-r--r--drivers/block/paride/pd.c30
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/skd_main.c16
-rw-r--r--drivers/block/sunvdc.c154
-rw-r--r--drivers/block/swim3.c14
-rw-r--r--drivers/block/sx8.c434
-rw-r--r--drivers/block/umem.c3
-rw-r--r--drivers/block/virtio_blk.c17
-rw-r--r--drivers/block/zram/Kconfig5
-rw-r--r--drivers/block/zram/zram_drv.c502
-rw-r--r--drivers/block/zram/zram_drv.h19
38 files changed, 1519 insertions, 1022 deletions
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index bf996bd44cfc..0903e0803ec8 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1601,8 +1601,6 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode,
return p->type->read_size;
#endif
default:
- printk(KERN_DEBUG "fd_ioctl: unknown cmd %d for drive %d.",
- cmd, drive);
return -ENOSYS;
}
return 0;
diff --git a/drivers/block/aoe/aoe.h b/drivers/block/aoe/aoe.h
index 7ca76ed2e71a..84d0fcebd6af 100644
--- a/drivers/block/aoe/aoe.h
+++ b/drivers/block/aoe/aoe.h
@@ -100,6 +100,10 @@ enum {
MAX_TAINT = 1000, /* cap on aoetgt taint */
};
+struct aoe_req {
+ unsigned long nr_bios;
+};
+
struct buf {
ulong nframesout;
struct bio *bio;
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index ed26b7287256..e2c6aae2d636 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -387,6 +387,7 @@ aoeblk_gdalloc(void *vp)
set = &d->tag_set;
set->ops = &aoeblk_mq_ops;
+ set->cmd_size = sizeof(struct aoe_req);
set->nr_hw_queues = 1;
set->queue_depth = 128;
set->numa_node = NUMA_NO_NODE;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index bb2fba651bd2..3cf9bc5d8d95 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -822,17 +822,6 @@ out:
spin_unlock_irqrestore(&d->lock, flags);
}
-static unsigned long
-rqbiocnt(struct request *r)
-{
- struct bio *bio;
- unsigned long n = 0;
-
- __rq_for_each_bio(bio, r)
- n++;
- return n;
-}
-
static void
bufinit(struct buf *buf, struct request *rq, struct bio *bio)
{
@@ -847,6 +836,7 @@ nextbuf(struct aoedev *d)
{
struct request *rq;
struct request_queue *q;
+ struct aoe_req *req;
struct buf *buf;
struct bio *bio;
@@ -865,7 +855,11 @@ nextbuf(struct aoedev *d)
blk_mq_start_request(rq);
d->ip.rq = rq;
d->ip.nxbio = rq->bio;
- rq->special = (void *) rqbiocnt(rq);
+
+ req = blk_mq_rq_to_pdu(rq);
+ req->nr_bios = 0;
+ __rq_for_each_bio(bio, rq)
+ req->nr_bios++;
}
buf = mempool_alloc(d->bufpool, GFP_ATOMIC);
if (buf == NULL) {
@@ -1069,16 +1063,13 @@ aoe_end_request(struct aoedev *d, struct request *rq, int fastfail)
static void
aoe_end_buf(struct aoedev *d, struct buf *buf)
{
- struct request *rq;
- unsigned long n;
+ struct request *rq = buf->rq;
+ struct aoe_req *req = blk_mq_rq_to_pdu(rq);
if (buf == d->ip.buf)
d->ip.buf = NULL;
- rq = buf->rq;
mempool_free(buf, d->bufpool);
- n = (unsigned long) rq->special;
- rq->special = (void *) --n;
- if (n == 0)
+ if (--req->nr_bios == 0)
aoe_end_request(d, rq, 0);
}
diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c
index 9063f8efbd3b..5b49f1b33ebe 100644
--- a/drivers/block/aoe/aoedev.c
+++ b/drivers/block/aoe/aoedev.c
@@ -160,21 +160,22 @@ static void
aoe_failip(struct aoedev *d)
{
struct request *rq;
+ struct aoe_req *req;
struct bio *bio;
- unsigned long n;
aoe_failbuf(d, d->ip.buf);
-
rq = d->ip.rq;
if (rq == NULL)
return;
+
+ req = blk_mq_rq_to_pdu(rq);
while ((bio = d->ip.nxbio)) {
bio->bi_status = BLK_STS_IOERR;
d->ip.nxbio = bio->bi_next;
- n = (unsigned long) rq->special;
- rq->special = (void *) --n;
+ req->nr_bios--;
}
- if ((unsigned long) rq->special == 0)
+
+ if (!req->nr_bios)
aoe_end_request(d, rq, 0);
}
diff --git a/drivers/block/aoe/aoemain.c b/drivers/block/aoe/aoemain.c
index 251482066977..1e4e2971171c 100644
--- a/drivers/block/aoe/aoemain.c
+++ b/drivers/block/aoe/aoemain.c
@@ -24,7 +24,7 @@ static void discover_timer(struct timer_list *t)
aoecmd_cfg(0xffff, 0xff);
}
-static void
+static void __exit
aoe_exit(void)
{
del_timer_sync(&timer);
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index f88b4c26d422..b0dbbdfeb33e 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1471,6 +1471,15 @@ static void setup_req_params( int drive )
ReqTrack, ReqSector, (unsigned long)ReqData ));
}
+static void ataflop_commit_rqs(struct blk_mq_hw_ctx *hctx)
+{
+ spin_lock_irq(&ataflop_lock);
+ atari_disable_irq(IRQ_MFP_FDC);
+ finish_fdc();
+ atari_enable_irq(IRQ_MFP_FDC);
+ spin_unlock_irq(&ataflop_lock);
+}
+
static blk_status_t ataflop_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
@@ -1947,6 +1956,7 @@ static const struct block_device_operations floppy_fops = {
static const struct blk_mq_ops ataflop_mq_ops = {
.queue_rq = ataflop_queue_rq,
+ .commit_rqs = ataflop_commit_rqs,
};
static struct kobject *floppy_find(dev_t dev, int *part, void *data)
@@ -1982,6 +1992,7 @@ static int __init atari_floppy_init (void)
&ataflop_mq_ops, 2,
BLK_MQ_F_SHOULD_MERGE);
if (IS_ERR(unit[i].disk->queue)) {
+ put_disk(unit[i].disk);
ret = PTR_ERR(unit[i].disk->queue);
unit[i].disk->queue = NULL;
goto err;
@@ -2033,18 +2044,13 @@ static int __init atari_floppy_init (void)
return 0;
err:
- do {
+ while (--i >= 0) {
struct gendisk *disk = unit[i].disk;
- if (disk) {
- if (disk->queue) {
- blk_cleanup_queue(disk->queue);
- disk->queue = NULL;
- }
- blk_mq_free_tag_set(&unit[i].tag_set);
- put_disk(unit[i].disk);
- }
- } while (i--);
+ blk_cleanup_queue(disk->queue);
+ blk_mq_free_tag_set(&unit[i].tag_set);
+ put_disk(unit[i].disk);
+ }
unregister_blkdev(FLOPPY_MAJOR, "fd");
return ret;
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index 5d5e8d6a8a56..f13b48ff5f43 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -237,6 +237,8 @@ static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_re
seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
+ seq_print_rq_state_bit(m, f & EE_TRIM, &sep, "trim");
+ seq_print_rq_state_bit(m, f & EE_ZEROOUT, &sep, "zero-out");
seq_print_rq_state_bit(m, f & EE_WRITE_SAME, &sep, "write-same");
seq_putc(m, '\n');
}
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 1e47db57b9d2..000a2f4c0e92 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -430,7 +430,11 @@ enum {
__EE_MAY_SET_IN_SYNC,
/* is this a TRIM aka REQ_OP_DISCARD? */
- __EE_IS_TRIM,
+ __EE_TRIM,
+ /* explicit zero-out requested, or
+ * our lower level cannot handle trim,
+ * and we want to fall back to zeroout instead */
+ __EE_ZEROOUT,
/* In case a barrier failed,
* we need to resubmit without the barrier flag. */
@@ -472,7 +476,8 @@ enum {
};
#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
-#define EE_IS_TRIM (1<<__EE_IS_TRIM)
+#define EE_TRIM (1<<__EE_TRIM)
+#define EE_ZEROOUT (1<<__EE_ZEROOUT)
#define EE_RESUBMITTED (1<<__EE_RESUBMITTED)
#define EE_WAS_ERROR (1<<__EE_WAS_ERROR)
#define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST)
@@ -1556,6 +1561,8 @@ extern void start_resync_timer_fn(struct timer_list *t);
extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
/* drbd_receiver.c */
+extern int drbd_issue_discard_or_zero_out(struct drbd_device *device,
+ sector_t start, unsigned int nr_sectors, int flags);
extern int drbd_receiver(struct drbd_thread *thi);
extern int drbd_ack_receiver(struct drbd_thread *thi);
extern void drbd_send_ping_wf(struct work_struct *ws);
@@ -1609,13 +1616,7 @@ static inline void drbd_tcp_quickack(struct socket *sock)
}
/* sets the number of 512 byte sectors of our virtual device */
-static inline void drbd_set_my_capacity(struct drbd_device *device,
- sector_t size)
-{
- /* set_capacity(device->this_bdev->bd_disk, size); */
- set_capacity(device->vdisk, size);
- device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
-}
+void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
/*
* used to submit our private bio
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index fa8204214ac0..714eb64fabfd 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1668,7 +1668,11 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection,
(bio->bi_opf & REQ_PREFLUSH ? DP_FLUSH : 0) |
(bio_op(bio) == REQ_OP_WRITE_SAME ? DP_WSAME : 0) |
(bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0) |
- (bio_op(bio) == REQ_OP_WRITE_ZEROES ? DP_DISCARD : 0);
+ (bio_op(bio) == REQ_OP_WRITE_ZEROES ?
+ ((connection->agreed_features & DRBD_FF_WZEROES) ?
+ (DP_ZEROES |(!(bio->bi_opf & REQ_NOUNMAP) ? DP_DISCARD : 0))
+ : DP_DISCARD)
+ : 0);
else
return bio->bi_opf & REQ_SYNC ? DP_RW_SYNC : 0;
}
@@ -1712,10 +1716,11 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
}
p->dp_flags = cpu_to_be32(dp_flags);
- if (dp_flags & DP_DISCARD) {
+ if (dp_flags & (DP_DISCARD|DP_ZEROES)) {
+ enum drbd_packet cmd = (dp_flags & DP_ZEROES) ? P_ZEROES : P_TRIM;
struct p_trim *t = (struct p_trim*)p;
t->size = cpu_to_be32(req->i.size);
- err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0);
+ err = __send_command(peer_device->connection, device->vnr, sock, cmd, sizeof(*t), NULL, 0);
goto out;
}
if (dp_flags & DP_WSAME) {
@@ -2034,6 +2039,21 @@ void drbd_init_set_defaults(struct drbd_device *device)
device->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
}
+static void _drbd_set_my_capacity(struct drbd_device *device, sector_t size)
+{
+ /* set_capacity(device->this_bdev->bd_disk, size); */
+ set_capacity(device->vdisk, size);
+ device->this_bdev->bd_inode->i_size = (loff_t)size << 9;
+}
+
+void drbd_set_my_capacity(struct drbd_device *device, sector_t size)
+{
+ char ppb[10];
+ _drbd_set_my_capacity(device, size);
+ drbd_info(device, "size = %s (%llu KB)\n",
+ ppsize(ppb, size>>1), (unsigned long long)size>>1);
+}
+
void drbd_device_cleanup(struct drbd_device *device)
{
int i;
@@ -2059,7 +2079,7 @@ void drbd_device_cleanup(struct drbd_device *device)
}
D_ASSERT(device, first_peer_device(device)->connection->net_conf == NULL);
- drbd_set_my_capacity(device, 0);
+ _drbd_set_my_capacity(device, 0);
if (device->bitmap) {
/* maybe never allocated. */
drbd_bm_resize(device, 0, 1);
@@ -2792,7 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
drbd_init_set_defaults(device);
- q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, &resource->req_lock);
+ q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE);
if (!q)
goto out_no_q;
device->rq_queue = q;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index d15703b1ffe8..f2471172a961 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -127,6 +127,35 @@ static int drbd_msg_put_info(struct sk_buff *skb, const char *info)
return 0;
}
+__printf(2, 3)
+static int drbd_msg_sprintf_info(struct sk_buff *skb, const char *fmt, ...)
+{
+ va_list args;
+ struct nlattr *nla, *txt;
+ int err = -EMSGSIZE;
+ int len;
+
+ nla = nla_nest_start(skb, DRBD_NLA_CFG_REPLY);
+ if (!nla)
+ return err;
+
+ txt = nla_reserve(skb, T_info_text, 256);
+ if (!txt) {
+ nla_nest_cancel(skb, nla);
+ return err;
+ }
+ va_start(args, fmt);
+ len = vscnprintf(nla_data(txt), 256, fmt, args);
+ va_end(args);
+
+ /* maybe: retry with larger reserve, if truncated */
+ txt->nla_len = nla_attr_size(len+1);
+ nlmsg_trim(skb, (char*)txt + NLA_ALIGN(txt->nla_len));
+ nla_nest_end(skb, nla);
+
+ return 0;
+}
+
/* This would be a good candidate for a "pre_doit" hook,
* and per-family private info->pointers.
* But we need to stay compatible with older kernels.
@@ -668,14 +697,15 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for
if (rv == SS_TWO_PRIMARIES) {
/* Maybe the peer is detected as dead very soon...
retry at most once more in this case. */
- int timeo;
- rcu_read_lock();
- nc = rcu_dereference(connection->net_conf);
- timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
- rcu_read_unlock();
- schedule_timeout_interruptible(timeo);
- if (try < max_tries)
+ if (try < max_tries) {
+ int timeo;
try = max_tries - 1;
+ rcu_read_lock();
+ nc = rcu_dereference(connection->net_conf);
+ timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
+ rcu_read_unlock();
+ schedule_timeout_interruptible(timeo);
+ }
continue;
}
if (rv < SS_SUCCESS) {
@@ -921,7 +951,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
} prev;
sector_t u_size, size;
struct drbd_md *md = &device->ldev->md;
- char ppb[10];
void *buffer;
int md_moved, la_size_changed;
@@ -999,8 +1028,6 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
/* racy, see comments above. */
drbd_set_my_capacity(device, size);
md->la_size_sect = size;
- drbd_info(device, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
- (unsigned long long)size>>1);
}
if (rv <= DS_ERROR)
goto err_out;
@@ -1234,6 +1261,21 @@ static void fixup_discard_if_not_supported(struct request_queue *q)
}
}
+static void fixup_write_zeroes(struct drbd_device *device, struct request_queue *q)
+{
+ /* Fixup max_write_zeroes_sectors after blk_queue_stack_limits():
+ * if we can handle "zeroes" efficiently on the protocol,
+ * we want to do that, even if our backend does not announce
+ * max_write_zeroes_sectors itself. */
+ struct drbd_connection *connection = first_peer_device(device)->connection;
+ /* If the peer announces WZEROES support, use it. Otherwise, rather
+ * send explicit zeroes than rely on some discard-zeroes-data magic. */
+ if (connection->agreed_features & DRBD_FF_WZEROES)
+ q->limits.max_write_zeroes_sectors = DRBD_MAX_BBIO_SECTORS;
+ else
+ q->limits.max_write_zeroes_sectors = 0;
+}
+
static void decide_on_write_same_support(struct drbd_device *device,
struct request_queue *q,
struct request_queue *b, struct o_qlim *o,
@@ -1344,6 +1386,7 @@ static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backi
}
}
fixup_discard_if_not_supported(q);
+ fixup_write_zeroes(device, q);
}
void drbd_reconsider_queue_parameters(struct drbd_device *device, struct drbd_backing_dev *bdev, struct o_qlim *o)
@@ -1514,6 +1557,30 @@ static void sanitize_disk_conf(struct drbd_device *device, struct disk_conf *dis
}
}
+static int disk_opts_check_al_size(struct drbd_device *device, struct disk_conf *dc)
+{
+ int err = -EBUSY;
+
+ if (device->act_log &&
+ device->act_log->nr_elements == dc->al_extents)
+ return 0;
+
+ drbd_suspend_io(device);
+ /* If IO completion is currently blocked, we would likely wait
+ * "forever" for the activity log to become unused. So we don't. */
+ if (atomic_read(&device->ap_bio_cnt))
+ goto out;
+
+ wait_event(device->al_wait, lc_try_lock(device->act_log));
+ drbd_al_shrink(device);
+ err = drbd_check_al_size(device, dc);
+ lc_unlock(device->act_log);
+ wake_up(&device->al_wait);
+out:
+ drbd_resume_io(device);
+ return err;
+}
+
int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
{
struct drbd_config_context adm_ctx;
@@ -1576,15 +1643,12 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
}
}
- drbd_suspend_io(device);
- wait_event(device->al_wait, lc_try_lock(device->act_log));
- drbd_al_shrink(device);
- err = drbd_check_al_size(device, new_disk_conf);
- lc_unlock(device->act_log);
- wake_up(&device->al_wait);
- drbd_resume_io(device);
-
+ err = disk_opts_check_al_size(device, new_disk_conf);
if (err) {
+ /* Could be just "busy". Ignore?
+ * Introduce dedicated error code? */
+ drbd_msg_put_info(adm_ctx.reply_skb,
+ "Try again without changing current al-extents setting");
retcode = ERR_NOMEM;
goto fail_unlock;
}
@@ -1934,9 +1998,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
}
}
- if (device->state.conn < C_CONNECTED &&
- device->state.role == R_PRIMARY && device->ed_uuid &&
- (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
+ if (device->state.pdsk != D_UP_TO_DATE && device->ed_uuid &&
+ (device->state.role == R_PRIMARY || device->state.peer == R_PRIMARY) &&
+ (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
drbd_err(device, "Can only attach to data with current UUID=%016llX\n",
(unsigned long long)device->ed_uuid);
retcode = ERR_DATA_NOT_CURRENT;
@@ -1950,11 +2014,21 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
}
/* Prevent shrinking of consistent devices ! */
- if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
- drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0) < nbc->md.la_size_sect) {
- drbd_warn(device, "refusing to truncate a consistent device\n");
- retcode = ERR_DISK_TOO_SMALL;
- goto force_diskless_dec;
+ {
+ unsigned long long nsz = drbd_new_dev_size(device, nbc, nbc->disk_conf->disk_size, 0);
+ unsigned long long eff = nbc->md.la_size_sect;
+ if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && nsz < eff) {
+ if (nsz == nbc->disk_conf->disk_size) {
+ drbd_warn(device, "truncating a consistent device during attach (%llu < %llu)\n", nsz, eff);
+ } else {
+ drbd_warn(device, "refusing to truncate a consistent device (%llu < %llu)\n", nsz, eff);
+ drbd_msg_sprintf_info(adm_ctx.reply_skb,
+ "To-be-attached device has last effective > current size, and is consistent\n"
+ "(%llu > %llu sectors). Refusing to attach.", eff, nsz);
+ retcode = ERR_IMPLICIT_SHRINK;
+ goto force_diskless_dec;
+ }
+ }
}
lock_all_resources();
@@ -2654,8 +2728,10 @@ out:
static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection, bool force)
{
+ enum drbd_conns cstate;
enum drbd_state_rv rv;
+repeat:
rv = conn_request_state(connection, NS(conn, C_DISCONNECTING),
force ? CS_HARD : 0);
@@ -2673,6 +2749,11 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection
break;
case SS_CW_FAILED_BY_PEER:
+ spin_lock_irq(&connection->resource->req_lock);
+ cstate = connection->cstate;
+ spin_unlock_irq(&connection->resource->req_lock);
+ if (cstate <= C_WF_CONNECTION)
+ goto repeat;
/* The peer probably wants to see us outdated. */
rv = conn_request_state(connection, NS2(conn, C_DISCONNECTING,
disk, D_OUTDATED), 0);
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h
index 48dabbb21e11..e6fc5ad72501 100644
--- a/drivers/block/drbd/drbd_protocol.h
+++ b/drivers/block/drbd/drbd_protocol.h
@@ -70,6 +70,11 @@ enum drbd_packet {
* we may fall back to an opencoded loop instead. */
P_WSAME = 0x34,
+ /* 0x35 already claimed in DRBD 9 */
+ P_ZEROES = 0x36, /* data sock: zero-out, WRITE_ZEROES */
+
+ /* 0x40 .. 0x48 already claimed in DRBD 9 */
+
P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */
P_MAX_OPT_CMD = 0x101,
@@ -130,6 +135,12 @@ struct p_header100 {
#define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
#define DP_SEND_WRITE_ACK 256 /* This is a proto C write request */
#define DP_WSAME 512 /* equiv. REQ_WRITE_SAME */
+#define DP_ZEROES 1024 /* equiv. REQ_OP_WRITE_ZEROES */
+
+/* possible combinations:
+ * REQ_OP_WRITE_ZEROES: DP_DISCARD | DP_ZEROES
+ * REQ_OP_WRITE_ZEROES + REQ_NOUNMAP: DP_ZEROES
+ */
struct p_data {
u64 sector; /* 64 bits sector number */
@@ -197,6 +208,42 @@ struct p_block_req {
*/
#define DRBD_FF_WSAME 4
+/* supports REQ_OP_WRITE_ZEROES on the "wire" protocol.
+ *
+ * We used to map that to "discard" on the sending side, and if we cannot
+ * guarantee that discard zeroes data, the receiving side would map discard
+ * back to zero-out.
+ *
+ * With the introduction of REQ_OP_WRITE_ZEROES,
+ * we started to use that for both WRITE_ZEROES and DISCARDS,
+ * hoping that WRITE_ZEROES would "do what we want",
+ * UNMAP if possible, zero-out the rest.
+ *
+ * The example scenario is some LVM "thin" backend.
+ *
+ * While an un-allocated block on dm-thin reads as zeroes, on a dm-thin
+ * with "skip_block_zeroing=true", after a partial block write allocated
+ * that block, that same block may well map "undefined old garbage" from
+ * the backends on LBAs that have not yet been written to.
+ *
+ * If we cannot distinguish between zero-out and discard on the receiving
+ * side, to avoid "undefined old garbage" to pop up randomly at later times
+ * on supposedly zero-initialized blocks, we'd need to map all discards to
+ * zero-out on the receiving side. But that would potentially do a full
+ * alloc on thinly provisioned backends, even when the expectation was to
+ * unmap/trim/discard/de-allocate.
+ *
+ * We need to distinguish on the protocol level, whether we need to guarantee
+ * zeroes (and thus use zero-out, potentially doing the mentioned full-alloc),
+ * or if we want to put the emphasis on discard, and only do a "best effort
+ * zeroing" (by "discarding" blocks aligned to discard-granularity, and zeroing
+ * only potential unaligned head and tail clippings), to at least *try* to
+ * avoid "false positives" in an online-verify later, hoping that someone
+ * set skip_block_zeroing=false.
+ */
+#define DRBD_FF_WZEROES 8
+
+
struct p_connection_features {
u32 protocol_min;
u32 feature_flags;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 61c392752fe4..c7ad88d91a09 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -50,7 +50,7 @@
#include "drbd_req.h"
#include "drbd_vli.h"
-#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME)
+#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME|DRBD_FF_WZEROES)
struct packet_info {
enum drbd_packet cmd;
@@ -1490,14 +1490,129 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
}
-static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
+/*
+ * Mapping "discard" to ZEROOUT with UNMAP does not work for us:
+ * Drivers have to "announce" q->limits.max_write_zeroes_sectors, or it
+ * will directly go to fallback mode, submitting normal writes, and
+ * never even try to UNMAP.
+ *
+ * And dm-thin does not do this (yet), mostly because in general it has
+ * to assume that "skip_block_zeroing" is set. See also:
+ * https://www.mail-archive.com/dm-devel%40redhat.com/msg07965.html
+ * https://www.redhat.com/archives/dm-devel/2018-January/msg00271.html
+ *
+ * We *may* ignore the discard-zeroes-data setting, if so configured.
+ *
+ * Assumption is that this "discard_zeroes_data=0" is only because the backend
+ * may ignore partial unaligned discards.
+ *
+ * LVM/DM thin as of at least
+ * LVM version: 2.02.115(2)-RHEL7 (2015-01-28)
+ * Library version: 1.02.93-RHEL7 (2015-01-28)
+ * Driver version: 4.29.0
+ * still behaves this way.
+ *
+ * For unaligned (wrt. alignment and granularity) or too small discards,
+ * we zero-out the initial (and/or) trailing unaligned partial chunks,
+ * but discard all the aligned full chunks.
+ *
+ * At least for LVM/DM thin, with skip_block_zeroing=false,
+ * the result is effectively "discard_zeroes_data=1".
+ */
+/* flags: EE_TRIM|EE_ZEROOUT */
+int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, int flags)
{
struct block_device *bdev = device->ldev->backing_bdev;
+ struct request_queue *q = bdev_get_queue(bdev);
+ sector_t tmp, nr;
+ unsigned int max_discard_sectors, granularity;
+ int alignment;
+ int err = 0;
- if (blkdev_issue_zeroout(bdev, peer_req->i.sector, peer_req->i.size >> 9,
- GFP_NOIO, 0))
- peer_req->flags |= EE_WAS_ERROR;
+ if ((flags & EE_ZEROOUT) || !(flags & EE_TRIM))
+ goto zero_out;
+
+ /* Zero-sector (unknown) and one-sector granularities are the same. */
+ granularity = max(q->limits.discard_granularity >> 9, 1U);
+ alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
+
+ max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));