diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-29 18:08:49 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-29 18:08:49 -0800 |
commit | e9f8ca0ae7b7bc9a032b429929431c626a69dd5e (patch) | |
tree | c29205e2f381879ae148b5ca9c23b4afa55ece05 /drivers/md | |
parent | 05ef8b97ddf9aed40df977477daeab01760d7f9a (diff) | |
parent | 47ace7e012b9f7ad71d43ac9063d335ea3d6820b (diff) |
Merge tag 'for-5.6/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Fix DM core's potential for q->make_request_fn NULL pointer in the
unlikely case that a DM device is created without a DM table and then
accessed due to upper-layer userspace code or user error.
- Fix DM thin-provisioning's metadata_pre_commit_callback to not use
memory after it is free'd. Also refactor code to disallow changing
the thin-pool's data device once in use -- doing so guarantees smae
lifetime of pool's data device relative to the pool metadata.
- Fix DM space maps used by DM thinp and DM cache to avoid reuse of a
already used block. This race was identified with extremely heavy
snapshot use in the context of DM thin provisioning.
- Fix DM raid's table status relative to an active rebuild.
- Fix DM crypt to use GFP_NOIO rather than GFP_NOFS in call to
skcipher_request_alloc(). Also fix benbi IV constructor crash if used
in authenticated mode.
- Add DM crypt support for Elephant diffuser to allow for Bitlocker
compatibility.
- Fix DM verity target to not prefetch hash blocks for data that has
already been verified.
- Fix DM writecache's incorrect flush sequence during commit when in
SSD mode.
- Improve DM writecache's sequential write performance on SSDs.
- Add DM zoned target support for zone sizes smaller than 128MiB.
- Add DM multipath 'queue_if_no_path_timeout_secs' module param to
allow timeout if path isn't reinstated. This allows users a kernel
safety-net against IO hanging indefinitely, due to no active paths,
that has historically only been provided by multipathd userspace.
- Various DM code cleanups to use true/false rather than 1/0, a
variable rename in dm-dust, and fix for a math error in comment for
DM thin metadata's ondisk format.
* tag 'for-5.6/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (21 commits)
dm: fix potential for q->make_request_fn NULL pointer
dm writecache: improve performance of large linear writes on SSDs
dm mpath: Add timeout mechanism for queue_if_no_path
dm thin: change data device's flush_bio to be member of struct pool
dm thin: don't allow changing data device during thin-pool reload
dm thin: fix use-after-free in metadata_pre_commit_callback
dm thin metadata: use pool locking at end of dm_pool_metadata_close
dm writecache: fix incorrect flush sequence when doing SSD mode commit
dm crypt: fix benbi IV constructor crash if used in authenticated mode
dm crypt: Implement Elephant diffuser for Bitlocker compatibility
dm space map common: fix to ensure new block isn't already in use
dm verity: don't prefetch hash blocks for already-verified data
dm crypt: fix GFP flags passed to skcipher_request_alloc()
dm thin metadata: Fix trivial math error in on-disk format documentation
dm thin metadata: use true/false for bool variable
dm snapshot: use true/false for bool variable
dm bio prison v2: use true/false for bool variable
dm mpath: use true/false for bool variable
dm zoned: support zone sizes smaller than 128MiB
dm raid: table line rebuild status fixes
...
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-bio-prison-v2.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 335 | ||||
-rw-r--r-- | drivers/md/dm-dust.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 68 | ||||
-rw-r--r-- | drivers/md/dm-raid.c | 43 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 6 | ||||
-rw-r--r-- | drivers/md/dm-thin-metadata.c | 22 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 36 | ||||
-rw-r--r-- | drivers/md/dm-verity-target.c | 18 | ||||
-rw-r--r-- | drivers/md/dm-writecache.c | 71 | ||||
-rw-r--r-- | drivers/md/dm-zoned-metadata.c | 23 | ||||
-rw-r--r-- | drivers/md/dm.c | 9 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 27 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.h | 2 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-disk.c | 6 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 5 |
16 files changed, 578 insertions, 101 deletions
diff --git a/drivers/md/dm-bio-prison-v2.c b/drivers/md/dm-bio-prison-v2.c index 8ee019eda32d..9dec3b61cf70 100644 --- a/drivers/md/dm-bio-prison-v2.c +++ b/drivers/md/dm-bio-prison-v2.c @@ -324,7 +324,7 @@ static bool __unlock(struct dm_bio_prison_v2 *prison, bio_list_init(&cell->bios); if (cell->shared_count) { - cell->exclusive_lock = 0; + cell->exclusive_lock = false; return false; } diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index eb9782fc93fe..c6a529873d0f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1,8 +1,8 @@ /* * Copyright (C) 2003 Jana Saout <jana@saout.de> * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org> - * Copyright (C) 2006-2017 Red Hat, Inc. All rights reserved. - * Copyright (C) 2013-2017 Milan Broz <gmazyland@gmail.com> + * Copyright (C) 2006-2020 Red Hat, Inc. All rights reserved. + * Copyright (C) 2013-2020 Milan Broz <gmazyland@gmail.com> * * This file is released under the GPL. */ @@ -115,6 +115,11 @@ struct iv_tcw_private { u8 *whitening; }; +#define ELEPHANT_MAX_KEY_SIZE 32 +struct iv_elephant_private { + struct crypto_skcipher *tfm; +}; + /* * Crypt: maps a linear range of a block device * and encrypts / decrypts at the same time. @@ -125,6 +130,7 @@ enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID, enum cipher_flags { CRYPT_MODE_INTEGRITY_AEAD, /* Use authenticated mode for cihper */ CRYPT_IV_LARGE_SECTORS, /* Calculate IV from sector_size, not 512B sectors */ + CRYPT_ENCRYPT_PREPROCESS, /* Must preprocess data for encryption (elephant) */ }; /* @@ -152,6 +158,7 @@ struct crypt_config { struct iv_benbi_private benbi; struct iv_lmk_private lmk; struct iv_tcw_private tcw; + struct iv_elephant_private elephant; } iv_gen_private; u64 iv_offset; unsigned int iv_size; @@ -285,6 +292,11 @@ static struct crypto_aead *any_tfm_aead(struct crypt_config *cc) * eboiv: Encrypted byte-offset IV (used in Bitlocker in CBC mode) * The IV is encrypted little-endian byte-offset (with the same key * and cipher as the volume). + * + * elephant: The extended version of eboiv with additional Elephant diffuser + * used with Bitlocker CBC mode. + * This mode was used in older Windows systems + * http://download.microsoft.com/download/0/2/3/0238acaf-d3bf-4a6d-b3d6-0a0be4bbb36e/bitlockercipher200608.pdf */ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, @@ -331,8 +343,14 @@ static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti, const char *opts) { - unsigned bs = crypto_skcipher_blocksize(any_tfm(cc)); - int log = ilog2(bs); + unsigned bs; + int log; + + if (test_bit(CRYPT_MODE_INTEGRITY_AEAD, &cc->cipher_flags)) + bs = crypto_aead_blocksize(any_tfm_aead(cc)); + else + bs = crypto_skcipher_blocksize(any_tfm(cc)); + log = ilog2(bs); /* we need to calculate how far we must shift the sector count * to get the cipher block count, we use this shift in _gen */ @@ -717,7 +735,7 @@ static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, struct crypto_wait wait; int err; - req = skcipher_request_alloc(any_tfm(cc), GFP_KERNEL | GFP_NOFS); + req = skcipher_request_alloc(any_tfm(cc), GFP_NOIO); if (!req) return -ENOMEM; @@ -734,6 +752,290 @@ static int crypt_iv_eboiv_gen(struct crypt_config *cc, u8 *iv, return err; } +static void crypt_iv_elephant_dtr(struct crypt_config *cc) +{ + struct iv_elephant_private *elephant = &cc->iv_gen_private.elephant; + + crypto_free_skcipher(elephant->tfm); + elephant->tfm = NULL; +} + +static int crypt_iv_elephant_ctr(struct crypt_config *cc, struct dm_target *ti, + const char *opts) +{ + struct iv_elephant_private *elephant = &cc->iv_gen_private.elephant; + int r; + + elephant->tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0); + if (IS_ERR(elephant->tfm)) { + r = PTR_ERR(elephant->tfm); + elephant->tfm = NULL; + return r; + } + + r = crypt_iv_eboiv_ctr(cc, ti, NULL); + if (r) + crypt_iv_elephant_dtr(cc); + return r; +} + +static void diffuser_disk_to_cpu(u32 *d, size_t n) +{ +#ifndef __LITTLE_ENDIAN + int i; + + for (i = 0; i < n; i++) + d[i] = le32_to_cpu((__le32)d[i]); +#endif +} + +static void diffuser_cpu_to_disk(__le32 *d, size_t n) +{ +#ifndef __LITTLE_ENDIAN + int i; + + for (i = 0; i < n; i++) + d[i] = cpu_to_le32((u32)d[i]); +#endif +} + +static void diffuser_a_decrypt(u32 *d, size_t n) +{ + int i, i1, i2, i3; + + for (i = 0; i < 5; i++) { + i1 = 0; + i2 = n - 2; + i3 = n - 5; + + while (i1 < (n - 1)) { + d[i1] += d[i2] ^ (d[i3] << 9 | d[i3] >> 23); + i1++; i2++; i3++; + + if (i3 >= n) + i3 -= n; + + d[i1] += d[i2] ^ d[i3]; + i1++; i2++; i3++; + + if (i2 >= n) + i2 -= n; + + d[i1] += d[i2] ^ (d[i3] << 13 | d[i3] >> 19); + i1++; i2++; i3++; + + d[i1] += d[i2] ^ d[i3]; + i1++; i2++; i3++; + } + } +} + +static void diffuser_a_encrypt(u32 *d, size_t n) +{ + int i, i1, i2, i3; + + for (i = 0; i < 5; i++) { + i1 = n - 1; + i2 = n - 2 - 1; + i3 = n - 5 - 1; + + while (i1 > 0) { + d[i1] -= d[i2] ^ d[i3]; + i1--; i2--; i3--; + + d[i1] -= d[i2] ^ (d[i3] << 13 | d[i3] >> 19); + i1--; i2--; i3--; + + if (i2 < 0) + i2 += n; + + d[i1] -= d[i2] ^ d[i3]; + i1--; i2--; i3--; + + if (i3 < 0) + i3 += n; + + d[i1] -= d[i2] ^ (d[i3] << 9 | d[i3] >> 23); + i1--; i2--; i3--; + } + } +} + +static void diffuser_b_decrypt(u32 *d, size_t n) +{ + int i, i1, i2, i3; + + for (i = 0; i < 3; i++) { + i1 = 0; + i2 = 2; + i3 = 5; + + while (i1 < (n - 1)) { + d[i1] += d[i2] ^ d[i3]; + i1++; i2++; i3++; + + d[i1] += d[i2] ^ (d[i3] << 10 | d[i3] >> 22); + i1++; i2++; i3++; + + if (i2 >= n) + i2 -= n; + + d[i1] += d[i2] ^ d[i3]; + i1++; i2++; i3++; + + if (i3 >= n) + i3 -= n; + + d[i1] += d[i2] ^ (d[i3] << 25 | d[i3] >> 7); + i1++; i2++; i3++; + } + } +} + +static void diffuser_b_encrypt(u32 *d, size_t n) +{ + int i, i1, i2, i3; + + for (i = 0; i < 3; i++) { + i1 = n - 1; + i2 = 2 - 1; + i3 = 5 - 1; + + while (i1 > 0) { + d[i1] -= d[i2] ^ (d[i3] << 25 | d[i3] >> 7); + i1--; i2--; i3--; + + if (i3 < 0) + i3 += n; + + d[i1] -= d[i2] ^ d[i3]; + i1--; i2--; i3--; + + if (i2 < 0) + i2 += n; + + d[i1] -= d[i2] ^ (d[i3] << 10 | d[i3] >> 22); + i1--; i2--; i3--; + + d[i1] -= d[i2] ^ d[i3]; + i1--; i2--; i3--; + } + } +} + +static int crypt_iv_elephant(struct crypt_config *cc, struct dm_crypt_request *dmreq) +{ + struct iv_elephant_private *elephant = &cc->iv_gen_private.elephant; + u8 *es, *ks, *data, *data2, *data_offset; + struct skcipher_request *req; + struct scatterlist *sg, *sg2, src, dst; + struct crypto_wait wait; + int i, r; + + req = skcipher_request_alloc(elephant->tfm, GFP_NOIO); + es = kzalloc(16, GFP_NOIO); /* Key for AES */ + ks = kzalloc(32, GFP_NOIO); /* Elephant sector key */ + + if (!req || !es || !ks) { + r = -ENOMEM; + goto out; + } + + *(__le64 *)es = cpu_to_le64(dmreq->iv_sector * cc->sector_size); + + /* E(Ks, e(s)) */ + sg_init_one(&src, es, 16); + sg_init_one(&dst, ks, 16); + skcipher_request_set_crypt(req, &src, &dst, 16, NULL); + skcipher_request_set_callback(req, 0, crypto_req_done, &wait); + r = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); + if (r) + goto out; + + /* E(Ks, e'(s)) */ + es[15] = 0x80; + sg_init_one(&dst, &ks[16], 16); + r = crypto_wait_req(crypto_skcipher_encrypt(req), &wait); + if (r) + goto out; + + sg = crypt_get_sg_data(cc, dmreq->sg_out); + data = kmap_atomic(sg_page(sg)); + data_offset = data + sg->offset; + + /* Cannot modify original bio, copy to sg_out and apply Elephant to it */ + if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) { + sg2 = crypt_get_sg_data(cc, dmreq->sg_in); + data2 = kmap_atomic(sg_page(sg2)); + memcpy(data_offset, data2 + sg2->offset, cc->sector_size); + kunmap_atomic(data2); + } + + if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) { + diffuser_disk_to_cpu((u32*)data_offset, cc->sector_size / sizeof(u32)); + diffuser_b_decrypt((u32*)data_offset, cc->sector_size / sizeof(u32)); + diffuser_a_decrypt((u32*)data_offset, cc->sector_size / sizeof(u32)); + diffuser_cpu_to_disk((__le32*)data_offset, cc->sector_size / sizeof(u32)); + } + + for (i = 0; i < (cc->sector_size / 32); i++) + crypto_xor(data_offset + i * 32, ks, 32); + + if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) { + diffuser_disk_to_cpu((u32*)data_offset, cc->sector_size / sizeof(u32)); + diffuser_a_encrypt((u32*)data_offset, cc->sector_size / sizeof(u32)); + diffuser_b_encrypt((u32*)data_offset, cc->sector_size / sizeof(u32)); + diffuser_cpu_to_disk((__le32*)data_offset, cc->sector_size / sizeof(u32)); + } + + kunmap_atomic(data); +out: + kzfree(ks); + kzfree(es); + skcipher_request_free(req); + return r; +} + +static int crypt_iv_elephant_gen(struct crypt_config *cc, u8 *iv, + struct dm_crypt_request *dmreq) +{ + int r; + + if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) { + r = crypt_iv_elephant(cc, dmreq); + if (r) + return r; + } + + return crypt_iv_eboiv_gen(cc, iv, dmreq); +} + +static int crypt_iv_elephant_post(struct crypt_config *cc, u8 *iv, + struct dm_crypt_request *dmreq) +{ + if (bio_data_dir(dmreq->ctx->bio_in) != WRITE) + return crypt_iv_elephant(cc, dmreq); + + return 0; +} + +static int crypt_iv_elephant_init(struct crypt_config *cc) +{ + struct iv_elephant_private *elephant = &cc->iv_gen_private.elephant; + int key_offset = cc->key_size - cc->key_extra_size; + + return crypto_skcipher_setkey(elephant->tfm, &cc->key[key_offset], cc->key_extra_size); +} + +static int crypt_iv_elephant_wipe(struct crypt_config *cc) +{ + struct iv_elephant_private *elephant = &cc->iv_gen_private.elephant; + u8 key[ELEPHANT_MAX_KEY_SIZE]; + + memset(key, 0, cc->key_extra_size); + return crypto_skcipher_setkey(elephant->tfm, key, cc->key_extra_size); +} + static const struct crypt_iv_operations crypt_iv_plain_ops = { .generator = crypt_iv_plain_gen }; @@ -787,6 +1089,15 @@ static struct crypt_iv_operations crypt_iv_eboiv_ops = { .generator = crypt_iv_eboiv_gen }; +static struct crypt_iv_operations crypt_iv_elephant_ops = { + .ctr = crypt_iv_elephant_ctr, + .dtr = crypt_iv_elephant_dtr, + .init = crypt_iv_elephant_init, + .wipe = crypt_iv_elephant_wipe, + .generator = crypt_iv_elephant_gen, + .post = crypt_iv_elephant_post +}; + /* * Integrity extensions */ @@ -1103,6 +1414,9 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc, r = cc->iv_gen_ops->generator(cc, org_iv, dmreq); if (r < 0) return r; + /* Data can be already preprocessed in generator */ + if (test_bit(CRYPT_ENCRYPT_PREPROCESS, &cc->cipher_flags)) + sg_in = sg_out; /* Store generated IV in integrity metadata */ if (cc->integrity_iv_size) memcpy(tag_iv, org_iv, cc->integrity_iv_size); @@ -2191,7 +2505,14 @@ static int crypt_ctr_ivmode(struct dm_target *ti, const char *ivmode) cc->iv_gen_ops = &crypt_iv_null_ops; else if (strcmp(ivmode, "eboiv") == 0) cc->iv_gen_ops = &crypt_iv_eboiv_ops; - else if (strcmp(ivmode, "lmk") == 0) { + else if (strcmp(ivmode, "elephant") == 0) { + cc->iv_gen_ops = &crypt_iv_elephant_ops; + cc->key_parts = 2; + cc->key_extra_size = cc->key_size / 2; + if (cc->key_extra_size > ELEPHANT_MAX_KEY_SIZE) + return -EINVAL; + set_bit(CRYPT_ENCRYPT_PREPROCESS, &cc->cipher_flags); + } else if (strcmp(ivmode, "lmk") == 0) { cc->iv_gen_ops = &crypt_iv_lmk_ops; /* * Version 2 and 3 is recognised according @@ -2959,7 +3280,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type crypt_target = { .name = "crypt", - .version = {1, 19, 0}, + .version = {1, 20, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c index eb37584427a4..ff03b90072c5 100644 --- a/drivers/md/dm-dust.c +++ b/drivers/md/dm-dust.c @@ -207,16 +207,16 @@ static int dust_map_write(struct dust_device *dd, sector_t thisblock, bool fail_read_on_bb) { unsigned long flags; - int ret = DM_MAPIO_REMAPPED; + int r = DM_MAPIO_REMAPPED; if (fail_read_on_bb) { thisblock >>= dd->sect_per_block_shift; spin_lock_irqsave(&dd->dust_lock, flags); - ret = __dust_map_write(dd, thisblock); + r = __dust_map_write(dd, thisblock); spin_unlock_irqrestore(&dd->dust_lock, flags); } - return ret; + return r; } static int dust_map(struct dm_target *ti, struct bio *bio) diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e0c32793c248..2bc18c9c3abc 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -20,6 +20,7 @@ #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/time.h> +#include <linux/timer.h> #include <linux/workqueue.h> #include <linux/delay.h> #include <scsi/scsi_dh.h> @@ -29,6 +30,9 @@ #define DM_MSG_PREFIX "multipath" #define DM_PG_INIT_DELAY_MSECS 2000 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) +#define QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT 0 + +static unsigned long queue_if_no_path_timeout_secs = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT; /* Path properties */ struct pgpath { @@ -91,6 +95,8 @@ struct multipath { struct work_struct process_queued_bios; struct bio_list queued_bios; + + struct timer_list nopath_timer; /* Timeout for queue_if_no_path */ }; /* @@ -108,6 +114,7 @@ static void trigger_event(struct work_struct *work); static void activate_or_offline_path(struct pgpath *pgpath); static void activate_path_work(struct work_struct *work); static void process_queued_bios(struct work_struct *work); +static void queue_if_no_path_timeout_work(struct timer_list *t); /*----------------------------------------------- * Multipath state flags. @@ -195,6 +202,8 @@ static struct multipath *alloc_multipath(struct dm_target *ti) m->ti = ti; ti->private = m; + + timer_setup(&m->nopath_timer, queue_if_no_path_timeout_work, 0); } return m; @@ -718,6 +727,43 @@ static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, } /* + * If the queue_if_no_path timeout fires, turn off queue_if_no_path and + * process any queued I/O. + */ +static void queue_if_no_path_timeout_work(struct timer_list *t) +{ + struct multipath *m = from_timer(m, t, nopath_timer); + struct mapped_device *md = dm_table_get_md(m->ti->table); + + DMWARN("queue_if_no_path timeout on %s, failing queued IO", dm_device_name(md)); + queue_if_no_path(m, false, false); +} + +/* + * Enable the queue_if_no_path timeout if necessary. + * Called with m->lock held. + */ +static void enable_nopath_timeout(struct multipath *m) +{ + unsigned long queue_if_no_path_timeout = + READ_ONCE(queue_if_no_path_timeout_secs) * HZ; + + lockdep_assert_held(&m->lock); + + if (queue_if_no_path_timeout > 0 && + atomic_read(&m->nr_valid_paths) == 0 && + test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { + mod_timer(&m->nopath_timer, + jiffies + queue_if_no_path_timeout); + } +} + +static void disable_nopath_timeout(struct multipath *m) +{ + del_timer_sync(&m->nopath_timer); +} + +/* * An event is triggered whenever a path is taken out of use. * Includes path failure and PG bypass. */ @@ -1090,6 +1136,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) struct dm_arg_set as; unsigned pg_count = 0; unsigned next_pg_num; + unsigned long flags; as.argc = argc; as.argv = argv; @@ -1154,6 +1201,10 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } + spin_lock_irqsave(&m->lock, flags); + enable_nopath_timeout(m); + spin_unlock_irqrestore(&m->lock, flags); + ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; @@ -1208,6 +1259,7 @@ static void multipath_dtr(struct dm_target *ti) { struct multipath *m = ti->private; + disable_nopath_timeout(m); flush_multipath_work(m); free_multipath(m); } @@ -1241,6 +1293,8 @@ static int fail_path(struct pgpath *pgpath) schedule_work(&m->trigger_event); + enable_nopath_timeout(m); + out: spin_unlock_irqrestore(&m->lock, flags); @@ -1291,6 +1345,9 @@ out: process_queued_io_list(m); } + if (pgpath->is_active) + disable_nopath_timeout(m); + return r; } @@ -1444,7 +1501,7 @@ static void pg_init_done(void *data, int errors) break; case SCSI_DH_RETRY: /* Wait before retrying. */ - delay_retry = 1; + delay_retry = true; /* fall through */ case SCSI_DH_IMM_RETRY: case SCSI_DH_RES_TEMP_UNAVAIL: @@ -1789,6 +1846,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, struct dm_dev *dev; struct multipath *m = ti->private; action_fn action; + unsigned long flags; mutex_lock(&m->work_mutex); @@ -1800,9 +1858,13 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, if (argc == 1) { if (!strcasecmp(argv[0], "queue_if_no_path")) { r = queue_if_no_path(m, true, false); + spin_lock_irqsave(&m->lock, flags); + enable_nopath_timeout(m); + spin_unlock_irqrestore(&m->lock, flags); goto out; } else if (!strcasecmp(argv[0], "fail_if_no_path")) { r = queue_if_no_path(m, false, false); + disable_nopath_timeout(m); goto out; } } @@ -2065,6 +2127,10 @@ static void __exit dm_multipath_exit(void) module_init(dm_multipath_init); module_exit(dm_multipath_exit); +module_param_named(queue_if_no_path_timeout_secs, + queue_if_no_path_timeout_secs, ulong, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds"); + MODULE_DESCRIPTION(DM_NAME " multipath target"); MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index c412eaa975fc..9a18bef0a5ff 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -129,7 +129,9 @@ struct raid_dev { CTR_FLAG_RAID10_COPIES | \ CTR_FLAG_RAID10_FORMAT | \ CTR_FLAG_DELTA_DISKS | \ - CTR_FLAG_DATA_OFFSET) + CTR_FLAG_DATA_OFFSET | \ + CTR_FLAG_JOURNAL_DEV | \ + CTR_FLAG_JOURNAL_MODE) /* Valid options definitions per raid level... */ @@ -3001,7 +3003,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) { 1, 254, "Cannot understand number of raid devices parameters" } }; - /* Must have <raid_type> */ arg = dm_shift_arg(&as); if (!arg) { ti->error = "No arguments"; @@ -3508,8 +3509,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, unsigned long recovery; unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */ unsigned int sz = 0; - unsigned int rebuild_disks; - unsigned int write_mostly_params = 0; + unsigned int rebuild_writemostly_count = 0; sector_t progress, resync_max_sectors, resync_mismatches; enum sync_state state; struct raid_type *rt; @@ -3593,18 +3593,20 @@ static void raid_status(struct dm_target *ti, status_type_t type, case STATUSTYPE_TABLE: /* Report the table line string you would use to construct this raid set */ - /* Calculate raid parameter count */ - for (i = 0; i < rs->raid_disks; i++) - if (test_bit(WriteMostly, &rs->dev[i].rdev.flags)) - write_mostly_params += 2; - rebuild_disks = memweight(rs->rebuild_disks, DISKS_ARRAY_ELEMS * sizeof(*rs->rebuild_disks)); - raid_param_cnt += rebuild_disks * 2 + - write_mostly_params + + /* + * Count any rebuild or writemostly argument pairs and subtract the + * hweight count being added below of any rebuild and writemostly ctr flags. + */ + for (i = 0; i < rs->raid_disks; i++) { + rebuild_writemostly_count += (test_bit(i, (void *) rs->rebuild_disks) ? 2 : 0) + + (test_bit(WriteMostly, &rs->dev[i].rdev.flags) ? 2 : 0); + } + rebuild_writemostly_count -= (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags) ? 2 : 0) + + (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags) ? 2 : 0); + /* Calculate raid parameter count based on ^ rebuild/writemostly argument counts and ctr flags set. */ + raid_param_cnt += rebuild_writemostly_count + hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) + - hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 + - (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0) + - (test_bit(__CTR_FLAG_JOURNAL_MODE, &rs->ctr_flags) ? 2 : 0); - + hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2; /* Emit table line */ /* This has to be in the documented order for userspace! */ DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors); @@ -3612,11 +3614,10 @@ static void raid_status(struct dm_target *ti, status_type_t type, DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_SYNC)); if (test_bit(__CTR_FLAG_NOSYNC, &rs->ctr_flags)) DMEMIT(" %s", dm_raid_arg_name_by_flag(CTR_FLAG_NOSYNC)); - if (rebuild_disks) + if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) for (i = 0; i < rs->raid_disks; i++) - if (test_bit(rs->dev[i].rdev.raid_disk, (void *) rs->rebuild_disks)) - DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), - rs->dev[i].rdev.raid_disk); + if (test_bit(i, (void *) rs->rebuild_disks)) + DMEMIT(" %s %u", dm_raid_arg_name_by_flag(CTR_FLAG_REBUILD), i); if (test_bit(__CTR_FLAG_DAEMON_SLEEP, &rs->ctr_flags)) DMEMIT(" %s %lu", dm_raid_arg_name_by_flag(CTR_FLAG_DAEMON_SLEEP), mddev->bitmap_info.daemon_sleep); @@ -3626,7 +3627,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, if (test_bit(__CTR_FLAG_MAX_RECOVERY_RATE, &rs->ctr_flags)) DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MAX_RECOVERY_RATE), mddev->sync_speed_max); - if (write_mostly_params) + if (test_bit(__CTR_FLAG_WRITE_MOSTLY, &rs->ctr_flags)) for (i = 0; i < rs->raid_disks; i++) if (test_bit(WriteMostly, &rs->dev[i].rdev.flags)) DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_WRITE_MOSTLY), @@ -4029,7 +4030,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 15, 0}, + .version = {1, 15, 1}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 4fb1a40e68a0..6b11a266299f 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1061,7 +1061,7 @@ static void snapshot_merge_next_chunks(struct dm_snapshot *s) DMERR("Read error in exception store: " "shutting down merge"); down_write(&s->lock); - s->merge_failed = 1; + s->merge_failed = true; up_write(&s->lock); } goto shut; @@ -1149,7 +1149,7 @@ static void merge_callback(int read_err, unsigned long write_err, void *context) shut: down_write(&s->lock); - s->merge_failed = 1; + s->merge_failed = true; b = __release_queued_bios_after_merge(s); up_write(&s->lock); error_bios(b); @@ -1314,7 +1314,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_LIST_HEAD(&s->list); spin_lock_init(&s->pe_lock); s->state_bits = 0; - s->merge_failed = 0; + s->merge_failed = false; s->first_merging_chunk = 0; s->num_merging_chunks = 0; bio_list_init(&s->bios_queued_during_merge); diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index b88d6d701f5b..fc9947d6210c 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -28,7 +28,7 @@ * * - A hierarchical btree, with 2 levels which effectively maps (thin * dev id, virtual block) -> block_time. Block time is a 64-bit - * field holding the time in the low 24 bits, and block in the top 48 + * field holding the time in the low 24 bits, and block in the top 40 * bits. * * BTrees consist solely of btree_nodes, that fill a block. Some are @@ -387,16 +387,15 @@ static int subtree_equal(void *context, const void *value1_le, const void *value * Variant that is used for in-core only changes or code that * shouldn't put the pool in service on its own (e.g. commit). */ -static inline void __pmd_write_lock(struct dm_pool_metadata *pmd) +static inline void pmd_write_lock_in_core(struct dm_pool_metadata *pmd) __acquires(pmd->root_lock) { down_write(&pmd->root_lock); } -#define pmd_write_lock_in_core(pmd) __pmd_write_lock((pmd)) static inline void pmd_write_lock(struct dm_pool_metadata *pmd) { - __pmd_write_lock(pmd); + pmd_write_lock_in_core(pmd); if (unlikely(!pmd->in_service)) pmd->in_service = true; } @@ -811,7 +810,7 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) return r; if (td->open_count) - td->changed = 0; + td->changed = false; else { list_del(&td->list); kfree(td); @@ -831,6 +830,7 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) * We need to know if the thin_disk_superblock exceeds a 512-byte sector. */ BUILD_BUG_ON(sizeof(struct thin_disk_superblock) > 512); + BUG_ON(!rwsem_is_locked(&pmd->root_lock)); if (unlikely(!pmd->in_service)) return 0; @@ -953,6 +953,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) return -EBUSY; } + pmd_write_lock_in_core(pmd); if (!dm_bm_is_read_only(pmd->bm) && !pmd->fail_io) { r = __commit_transaction(pmd); if (r < 0) @@ -961,6 +962,7 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) } if (!pmd->fail_io) __destroy_persistent_data_objects(pmd); + pmd_write_unlock(pmd); kfree(pmd); return 0; @@ -1106,7 +1108,7 @@ static int __set_snapshot_details(struct dm_pool_metadata *pmd, if (r) return r; - td->changed = 1; + td->changed = true; td->snapshotted_time = time; snap->mapped_blocks = td->mapped_blocks; @@ -1618,7 +1620,7 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, if (r) return r; - td->changed = 1; + td->changed = true; if (inserted) td->mapped_blocks++; @@ -1649,7 +1651,7 @@ static int __remove(struct dm_thin_device *td, dm_block_t block) return r; td->mapped_blocks--; - td->changed = 1; + td->changed = true; return 0; } @@ -1703,7 +1705,7 @@ static int __remove_range(struct dm_thin_device *td, dm_block_t begin, dm_block_ } td->mapped_blocks -= total_count; - td->changed = 1; + td->changed = true; /* * Reinsert the mapping tree. @@ -1841,7 +1843,7 @@ int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) * Care is taken to not have commit be what * triggers putting the thin-pool in-service. */ - __pmd_write_lock(pmd); + pmd_write_lock_in_core(pmd); if (pmd->fail_io) goto out; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 57626c27a54b..fa8d5464c1fb 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -231,6 +231,7 @@ struct pool { struct dm_target *ti; /* Only set if a pool target is bound */ struct mapped_device *pool_md; + struct block_device *data_dev; struct block_device *md_dev; struct dm_pool_metadata *pmd; @@ -281,6 +282,8 @@ struct pool { struct dm_bio_prison_cell **cell_sort_array; mempool_t mapping_pool; + + struct bio flush_bio; }; static void metadata_operation_failed(struct pool *pool, const char *op, int r); @@ -328,7 +331,6 @@ struct pool_c { dm_block_t low_water_blocks; struct pool_features requested_pf; /* |