summaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 09:12:01 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 09:12:01 -0800
commit53365383c4667aba55385cd1858582c19a7a8a36 (patch)
treeb290d003534b3947834762c2fb492d9d0beb985f /drivers/md
parent51b736b85155a56543fda8aeca5f8592795d7983 (diff)
parentd2fdb776e08d4231d7e86a879cc663a93913c202 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (80 commits) dm snapshot: use merge origin if snapshot invalid dm snapshot: report merge failure in status dm snapshot: merge consecutive chunks together dm snapshot: trigger exceptions in remaining snapshots during merge dm snapshot: delay merging a chunk until writes to it complete dm snapshot: queue writes to chunks being merged dm snapshot: add merging dm snapshot: permit only one merge at once dm snapshot: support barriers in snapshot merge target dm snapshot: avoid allocating exceptions in merge dm snapshot: rework writing to origin dm snapshot: add merge target dm exception store: add merge specific methods dm snapshot: create function for chunk_is_tracked wait dm snapshot: make bio optional in __origin_write dm mpath: reject messages when device is suspended dm: export suspended state to targets dm: rename dm_suspended to dm_suspended_md dm: swap target postsuspend call and setting suspended flag dm crypt: add plain64 iv ...
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-crypt.c207
-rw-r--r--drivers/md/dm-exception-store.c33
-rw-r--r--drivers/md/dm-exception-store.h62
-rw-r--r--drivers/md/dm-io.c120
-rw-r--r--drivers/md/dm-ioctl.c123
-rw-r--r--drivers/md/dm-kcopyd.c5
-rw-r--r--drivers/md/dm-log.c77
-rw-r--r--drivers/md/dm-mpath.c95
-rw-r--r--drivers/md/dm-raid1.c219
-rw-r--r--drivers/md/dm-region-hash.c31
-rw-r--r--drivers/md/dm-snap-persistent.c195
-rw-r--r--drivers/md/dm-snap-transient.c24
-rw-r--r--drivers/md/dm-snap.c1279
-rw-r--r--drivers/md/dm-sysfs.c10
-rw-r--r--drivers/md/dm-table.c3
-rw-r--r--drivers/md/dm-uevent.c9
-rw-r--r--drivers/md/dm.c643
-rw-r--r--drivers/md/dm.h13
18 files changed, 2274 insertions, 874 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index e412980763bd..a93637223c8d 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1,7 +1,7 @@
/*
* Copyright (C) 2003 Christophe Saout <christophe@saout.de>
* Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org>
- * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved.
*
* This file is released under the GPL.
*/
@@ -71,10 +71,21 @@ struct crypt_iv_operations {
int (*ctr)(struct crypt_config *cc, struct dm_target *ti,
const char *opts);
void (*dtr)(struct crypt_config *cc);
- const char *(*status)(struct crypt_config *cc);
+ int (*init)(struct crypt_config *cc);
+ int (*wipe)(struct crypt_config *cc);
int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector);
};
+struct iv_essiv_private {
+ struct crypto_cipher *tfm;
+ struct crypto_hash *hash_tfm;
+ u8 *salt;
+};
+
+struct iv_benbi_private {
+ int shift;
+};
+
/*
* Crypt: maps a linear range of a block device
* and encrypts / decrypts at the same time.
@@ -102,8 +113,8 @@ struct crypt_config {
struct crypt_iv_operations *iv_gen_ops;
char *iv_mode;
union {
- struct crypto_cipher *essiv_tfm;
- int benbi_shift;
+ struct iv_essiv_private essiv;
+ struct iv_benbi_private benbi;
} iv_gen_private;
sector_t iv_offset;
unsigned int iv_size;
@@ -147,6 +158,9 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io);
* plain: the initial vector is the 32-bit little-endian version of the sector
* number, padded with zeros if necessary.
*
+ * plain64: the initial vector is the 64-bit little-endian version of the sector
+ * number, padded with zeros if necessary.
+ *
* essiv: "encrypted sector|salt initial vector", the sector number is
* encrypted with the bulk cipher using a salt as key. The salt
* should be derived from the bulk cipher's key via hashing.
@@ -169,88 +183,123 @@ static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
return 0;
}
-static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
- const char *opts)
+static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv,
+ sector_t sector)
{
- struct crypto_cipher *essiv_tfm;
- struct crypto_hash *hash_tfm;
+ memset(iv, 0, cc->iv_size);
+ *(u64 *)iv = cpu_to_le64(sector);
+
+ return 0;
+}
+
+/* Initialise ESSIV - compute salt but no local memory allocations */
+static int crypt_iv_essiv_init(struct crypt_config *cc)
+{
+ struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
struct hash_desc desc;
struct scatterlist sg;
- unsigned int saltsize;
- u8 *salt;
int err;
- if (opts == NULL) {
+ sg_init_one(&sg, cc->key, cc->key_size);
+ desc.tfm = essiv->hash_tfm;
+ desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ err = crypto_hash_digest(&desc, &sg, cc->key_size, essiv->salt);
+ if (err)
+ return err;
+
+ return crypto_cipher_setkey(essiv->tfm, essiv->salt,
+ crypto_hash_digestsize(essiv->hash_tfm));
+}
+
+/* Wipe salt and reset key derived from volume key */
+static int crypt_iv_essiv_wipe(struct crypt_config *cc)
+{
+ struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
+ unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm);
+
+ memset(essiv->salt, 0, salt_size);
+
+ return crypto_cipher_setkey(essiv->tfm, essiv->salt, salt_size);
+}
+
+static void crypt_iv_essiv_dtr(struct crypt_config *cc)
+{
+ struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
+
+ crypto_free_cipher(essiv->tfm);
+ essiv->tfm = NULL;
+
+ crypto_free_hash(essiv->hash_tfm);
+ essiv->hash_tfm = NULL;
+
+ kzfree(essiv->salt);
+ essiv->salt = NULL;
+}
+
+static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
+ const char *opts)
+{
+ struct crypto_cipher *essiv_tfm = NULL;
+ struct crypto_hash *hash_tfm = NULL;
+ u8 *salt = NULL;
+ int err;
+
+ if (!opts) {
ti->error = "Digest algorithm missing for ESSIV mode";
return -EINVAL;
}
- /* Hash the cipher key with the given hash algorithm */
+ /* Allocate hash algorithm */
hash_tfm = crypto_alloc_hash(opts, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(hash_tfm)) {
ti->error = "Error initializing ESSIV hash";
- return PTR_ERR(hash_tfm);
+ err = PTR_ERR(hash_tfm);
+ goto bad;
}
- saltsize = crypto_hash_digestsize(hash_tfm);
- salt = kmalloc(saltsize, GFP_KERNEL);
- if (salt == NULL) {
+ salt = kzalloc(crypto_hash_digestsize(hash_tfm), GFP_KERNEL);
+ if (!salt) {
ti->error = "Error kmallocing salt storage in ESSIV";
- crypto_free_hash(hash_tfm);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto bad;
}
- sg_init_one(&sg, cc->key, cc->key_size);
- desc.tfm = hash_tfm;
- desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- err = crypto_hash_digest(&desc, &sg, cc->key_size, salt);
- crypto_free_hash(hash_tfm);
-
- if (err) {
- ti->error = "Error calculating hash in ESSIV";
- kfree(salt);
- return err;
- }
-
- /* Setup the essiv_tfm with the given salt */
+ /* Allocate essiv_tfm */
essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(essiv_tfm)) {
ti->error = "Error allocating crypto tfm for ESSIV";
- kfree(salt);
- return PTR_ERR(essiv_tfm);
+ err = PTR_ERR(essiv_tfm);
+ goto bad;
}
if (crypto_cipher_blocksize(essiv_tfm) !=
crypto_ablkcipher_ivsize(cc->tfm)) {
ti->error = "Block size of ESSIV cipher does "
"not match IV size of block cipher";
- crypto_free_cipher(essiv_tfm);
- kfree(salt);
- return -EINVAL;
+ err = -EINVAL;
+ goto bad;
}
- err = crypto_cipher_setkey(essiv_tfm, salt, saltsize);
- if (err) {
- ti->error = "Failed to set key for ESSIV cipher";
- crypto_free_cipher(essiv_tfm);
- kfree(salt);
- return err;
- }
- kfree(salt);
- cc->iv_gen_private.essiv_tfm = essiv_tfm;
+ cc->iv_gen_private.essiv.salt = salt;
+ cc->iv_gen_private.essiv.tfm = essiv_tfm;
+ cc->iv_gen_private.essiv.hash_tfm = hash_tfm;
+
return 0;
-}
-static void crypt_iv_essiv_dtr(struct crypt_config *cc)
-{
- crypto_free_cipher(cc->iv_gen_private.essiv_tfm);
- cc->iv_gen_private.essiv_tfm = NULL;
+bad:
+ if (essiv_tfm && !IS_ERR(essiv_tfm))
+ crypto_free_cipher(essiv_tfm);
+ if (hash_tfm && !IS_ERR(hash_tfm))
+ crypto_free_hash(hash_tfm);
+ kfree(salt);
+ return err;
}
static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
{
memset(iv, 0, cc->iv_size);
*(u64 *)iv = cpu_to_le64(sector);
- crypto_cipher_encrypt_one(cc->iv_gen_private.essiv_tfm, iv, iv);
+ crypto_cipher_encrypt_one(cc->iv_gen_private.essiv.tfm, iv, iv);
return 0;
}
@@ -273,7 +322,7 @@ static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti,
return -EINVAL;
}
- cc->iv_gen_private.benbi_shift = 9 - log;
+ cc->iv_gen_private.benbi.shift = 9 - log;
return 0;
}
@@ -288,7 +337,7 @@ static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
memset(iv, 0, cc->iv_size - sizeof(u64)); /* rest is cleared below */
- val = cpu_to_be64(((u64)sector << cc->iv_gen_private.benbi_shift) + 1);
+ val = cpu_to_be64(((u64)sector << cc->iv_gen_private.benbi.shift) + 1);
put_unaligned(val, (__be64 *)(iv + cc->iv_size - sizeof(u64)));
return 0;
@@ -305,9 +354,15 @@ static struct crypt_iv_operations crypt_iv_plain_ops = {
.generator = crypt_iv_plain_gen
};
+static struct crypt_iv_operations crypt_iv_plain64_ops = {
+ .generator = crypt_iv_plain64_gen
+};
+
static struct crypt_iv_operations crypt_iv_essiv_ops = {
.ctr = crypt_iv_essiv_ctr,
.dtr = crypt_iv_essiv_dtr,
+ .init = crypt_iv_essiv_init,
+ .wipe = crypt_iv_essiv_wipe,
.generator = crypt_iv_essiv_gen
};
@@ -934,14 +989,14 @@ static int crypt_set_key(struct crypt_config *cc, char *key)
set_bit(DM_CRYPT_KEY_VALID, &cc->flags);
- return 0;
+ return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size);
}
static int crypt_wipe_key(struct crypt_config *cc)
{
clear_bit(DM_CRYPT_KEY_VALID, &cc->flags);
memset(&cc->key, 0, cc->key_size * sizeof(u8));
- return 0;
+ return crypto_ablkcipher_setkey(cc->tfm, cc->key, cc->key_size);
}
/*
@@ -983,11 +1038,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -ENOMEM;
}
- if (crypt_set_key(cc, argv[1])) {
- ti->error = "Error decoding key";
- goto bad_cipher;
- }
-
/* Compatibility mode for old dm-crypt cipher strings */
if (!chainmode || (strcmp(chainmode, "plain") == 0 && !ivmode)) {
chainmode = "cbc";
@@ -1015,6 +1065,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
strcpy(cc->chainmode, chainmode);
cc->tfm = tfm;
+ if (crypt_set_key(cc, argv[1]) < 0) {
+ ti->error = "Error decoding and setting key";
+ goto bad_ivmode;
+ }
+
/*
* Choose ivmode. Valid modes: "plain", "essiv:<esshash>", "benbi".
* See comments at iv code
@@ -1024,6 +1079,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->iv_gen_ops = NULL;
else if (strcmp(ivmode, "plain") == 0)
cc->iv_gen_ops = &crypt_iv_plain_ops;
+ else if (strcmp(ivmode, "plain64") == 0)
+ cc->iv_gen_ops = &crypt_iv_plain64_ops;
else if (strcmp(ivmode, "essiv") == 0)
cc->iv_gen_ops = &crypt_iv_essiv_ops;
else if (strcmp(ivmode, "benbi") == 0)
@@ -1039,6 +1096,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->iv_gen_ops->ctr(cc, ti, ivopts) < 0)
goto bad_ivmode;
+ if (cc->iv_gen_ops && cc->iv_gen_ops->init &&
+ cc->iv_gen_ops->init(cc) < 0) {
+ ti->error = "Error initialising IV";
+ goto bad_slab_pool;
+ }
+
cc->iv_size = crypto_ablkcipher_ivsize(tfm);
if (cc->iv_size)
/* at least a 64 bit sector number should fit in our buffer */
@@ -1085,11 +1148,6 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad_bs;
}
- if (crypto_ablkcipher_setkey(tfm, cc->key, key_size) < 0) {
- ti->error = "Error setting key";
- goto bad_device;
- }
-
if (sscanf(argv[2], "%llu", &tmpll) != 1) {
ti->error = "Invalid iv_offset sector";
goto bad_device;
@@ -1278,6 +1336,7 @@ static void crypt_resume(struct dm_target *ti)
static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
{
struct crypt_config *cc = ti->private;
+ int ret = -EINVAL;
if (argc < 2)
goto error;
@@ -1287,10 +1346,22 @@ static int crypt_message(struct dm_target *ti, unsigned argc, char **argv)
DMWARN("not suspended during key manipulation.");
return -EINVAL;
}
- if (argc == 3 && !strnicmp(argv[1], MESG_STR("set")))
- return crypt_set_key(cc, argv[2]);
- if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe")))
+ if (argc == 3 && !strnicmp(argv[1], MESG_STR("set"))) {
+ ret = crypt_set_key(cc, argv[2]);
+ if (ret)
+ return ret;
+ if (cc->iv_gen_ops && cc->iv_gen_ops->init)
+ ret = cc->iv_gen_ops->init(cc);
+ return ret;
+ }
+ if (argc == 2 && !strnicmp(argv[1], MESG_STR("wipe"))) {
+ if (cc->iv_gen_ops && cc->iv_gen_ops->wipe) {
+ ret = cc->iv_gen_ops->wipe(cc);
+ if (ret)
+ return ret;
+ }
return crypt_wipe_key(cc);
+ }
}
error:
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 7dbe652efb5a..2b7907b6dd09 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -172,7 +172,8 @@ int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
}
/* Validate the chunk size against the device block size */
- if (chunk_size % (bdev_logical_block_size(store->cow->bdev) >> 9)) {
+ if (chunk_size %
+ (bdev_logical_block_size(dm_snap_cow(store->snap)->bdev) >> 9)) {
*error = "Chunk size is not a multiple of device blocksize";
return -EINVAL;
}
@@ -190,6 +191,7 @@ int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
}
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
+ struct dm_snapshot *snap,
unsigned *args_used,
struct dm_exception_store **store)
{
@@ -198,7 +200,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
struct dm_exception_store *tmp_store;
char persistent;
- if (argc < 3) {
+ if (argc < 2) {
ti->error = "Insufficient exception store arguments";
return -EINVAL;
}
@@ -209,14 +211,15 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
return -ENOMEM;
}
- persistent = toupper(*argv[1]);
+ persistent = toupper(*argv[0]);
if (persistent == 'P')
type = get_type("P");
else if (persistent == 'N')
type = get_type("N");
else {
ti->error = "Persistent flag is not P or N";
- return -EINVAL;
+ r = -EINVAL;
+ goto bad_type;
}
if (!type) {
@@ -226,32 +229,23 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
}
tmp_store->type = type;
- tmp_store->ti = ti;
-
- r = dm_get_device(ti, argv[0], 0, 0,
- FMODE_READ | FMODE_WRITE, &tmp_store->cow);
- if (r) {
- ti->error = "Cannot get COW device";
- goto bad_cow;
- }
+ tmp_store->snap = snap;
- r = set_chunk_size(tmp_store, argv[2], &ti->error);
+ r = set_chunk_size(tmp_store, argv[1], &ti->error);
if (r)
- goto bad_ctr;
+ goto bad;
r = type->ctr(tmp_store, 0, NULL);
if (r) {
ti->error = "Exception store type constructor failed";
- goto bad_ctr;
+ goto bad;
}
- *args_used = 3;
+ *args_used = 2;
*store = tmp_store;
return 0;
-bad_ctr:
- dm_put_device(ti, tmp_store->cow);
-bad_cow:
+bad:
put_type(type);
bad_type:
kfree(tmp_store);
@@ -262,7 +256,6 @@ EXPORT_SYMBOL(dm_exception_store_create);
void dm_exception_store_destroy(struct dm_exception_store *store)
{
store->type->dtr(store);
- dm_put_device(store->ti, store->cow);
put_type(store->type);
kfree(store);
}
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 8a223a48802c..e8dfa06af3ba 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -26,7 +26,7 @@ typedef sector_t chunk_t;
* of chunks that follow contiguously. Remaining bits hold the number of the
* chunk within the device.
*/
-struct dm_snap_exception {
+struct dm_exception {
struct list_head hash_list;
chunk_t old_chunk;
@@ -64,17 +64,34 @@ struct dm_exception_store_type {
* Find somewhere to store the next exception.
*/
int (*prepare_exception) (struct dm_exception_store *store,
- struct dm_snap_exception *e);
+ struct dm_exception *e);
/*
* Update the metadata with this exception.
*/
void (*commit_exception) (struct dm_exception_store *store,
- struct dm_snap_exception *e,
+ struct dm_exception *e,
void (*callback) (void *, int success),
void *callback_context);
/*
+ * Returns 0 if the exception store is empty.
+ *
+ * If there are exceptions still to be merged, sets
+ * *last_old_chunk and *last_new_chunk to the most recent
+ * still-to-be-merged chunk and returns the number of
+ * consecutive previous ones.
+ */
+ int (*prepare_merge) (struct dm_exception_store *store,
+ chunk_t *last_old_chunk, chunk_t *last_new_chunk);
+
+ /*
+ * Clear the last n exceptions.
+ * nr_merged must be <= the value returned by prepare_merge.
+ */
+ int (*commit_merge) (struct dm_exception_store *store, int nr_merged);
+
+ /*
* The snapshot is invalid, note this in the metadata.
*/
void (*drop_snapshot) (struct dm_exception_store *store);
@@ -86,19 +103,19 @@ struct dm_exception_store_type {
/*
* Return how full the snapshot is.
*/
- void (*fraction_full) (struct dm_exception_store *store,
- sector_t *numerator,
- sector_t *denominator);
+ void (*usage) (struct dm_exception_store *store,
+ sector_t *total_sectors, sector_t *sectors_allocated,
+ sector_t *metadata_sectors);
/* For internal device-mapper use only. */
struct list_head list;
};
+struct dm_snapshot;
+
struct dm_exception_store {
struct dm_exception_store_type *type;
- struct dm_target *ti;
-
- struct dm_dev *cow;
+ struct dm_snapshot *snap;
/* Size of data blocks saved - must be a power of 2 */
unsigned chunk_size;
@@ -109,6 +126,11 @@ struct dm_exception_store {
};
/*
+ * Obtain the cow device used by a given snapshot.
+ */
+struct dm_dev *dm_snap_cow(struct dm_snapshot *snap);
+
+/*
* Funtions to manipulate consecutive chunks
*/
# if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
@@ -120,18 +142,25 @@ static inline chunk_t dm_chunk_number(chunk_t chunk)
return chunk & (chunk_t)((1ULL << DM_CHUNK_NUMBER_BITS) - 1ULL);
}
-static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
+static inline unsigned dm_consecutive_chunk_count(struct dm_exception *e)
{
return e->new_chunk >> DM_CHUNK_NUMBER_BITS;
}
-static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
+static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
{
e->new_chunk += (1ULL << DM_CHUNK_NUMBER_BITS);
BUG_ON(!dm_consecutive_chunk_count(e));
}
+static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
+{
+ BUG_ON(!dm_consecutive_chunk_count(e));
+
+ e->new_chunk -= (1ULL << DM_CHUNK_NUMBER_BITS);
+}
+
# else
# define DM_CHUNK_CONSECUTIVE_BITS 0
@@ -140,12 +169,16 @@ static inline chunk_t dm_chunk_number(chunk_t chunk)
return chunk;
}
-static inline unsigned dm_consecutive_chunk_count(struct dm_snap_exception *e)
+static inline unsigned dm_consecutive_chunk_count(struct dm_exception *e)
{
return 0;
}
-static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
+static inline void dm_consecutive_chunk_count_inc(struct dm_exception *e)
+{
+}
+
+static inline void dm_consecutive_chunk_count_dec(struct dm_exception *e)
{
}
@@ -162,7 +195,7 @@ static inline sector_t get_dev_size(struct block_device *bdev)
static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
sector_t sector)
{
- return (sector & ~store->chunk_mask) >> store->chunk_shift;
+ return sector >> store->chunk_shift;
}
int dm_exception_store_type_register(struct dm_exception_store_type *type);
@@ -173,6 +206,7 @@ int dm_exception_store_set_chunk_size(struct dm_exception_store *store,
char **error);
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
+ struct dm_snapshot *snap,
unsigned *args_used,
struct dm_exception_store **store);
void dm_exception_store_destroy(struct dm_exception_store *store);
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 3a2e6a2f8bdd..10f457ca6af2 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -5,6 +5,8 @@
* This file is released under the GPL.
*/
+#include "dm.h"
+
#include <linux/device-mapper.h>
#include <linux/bio.h>
@@ -14,12 +16,19 @@
#include <linux/slab.h>
#include <linux/dm-io.h>
+#define DM_MSG_PREFIX "io"
+
+#define DM_IO_MAX_REGIONS BITS_PER_LONG
+
struct dm_io_client {
mempool_t *pool;
struct bio_set *bios;
};
-/* FIXME: can we shrink this ? */
+/*
+ * Aligning 'struct io' reduces the number of bits required to store
+ * its address. Refer to store_io_and_region_in_bio() below.
+ */
struct io {
unsigned long error_bits;
unsigned long eopnotsupp_bits;
@@ -28,7 +37,9 @@ struct io {
struct dm_io_client *client;
io_notify_fn callback;
void *context;
-};
+} __attribute__((aligned(DM_IO_MAX_REGIONS)));
+
+static struct kmem_cache *_dm_io_cache;
/*
* io contexts are only dynamically allocated for asynchronous
@@ -53,7 +64,7 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages)
if (!client)
return ERR_PTR(-ENOMEM);
- client->pool = mempool_create_kmalloc_pool(ios, sizeof(struct io));
+ client->pool = mempool_create_slab_pool(ios, _dm_io_cache);
if (!client->pool)
goto bad;
@@ -88,18 +99,29 @@ EXPORT_SYMBOL(dm_io_client_destroy);
/*-----------------------------------------------------------------
* We need to keep track of which region a bio is doing io for.
- * In order to save a memory allocation we store this the last
- * bvec which we know is unused (blech).
- * XXX This is ugly and can OOPS with some configs... find another way.
+ * To avoid a memory allocation to store just 5 or 6 bits, we
+ * ensure the 'struct io' pointer is aligned so enough low bits are
+ * always zero and then combine it with the region number directly in
+ * bi_private.
*---------------------------------------------------------------*/
-static inline void bio_set_region(struct bio *bio, unsigned region)
+static void store_io_and_region_in_bio(struct bio *bio, struct io *io,
+ unsigned region)
{
- bio->bi_io_vec[bio->bi_max_vecs].bv_len = region;
+ if (unlikely(!IS_ALIGNED((unsigned long)io, DM_IO_MAX_REGIONS))) {
+ DMCRIT("Unaligned struct io pointer %p", io);
+ BUG();
+ }
+
+ bio->bi_private = (void *)((unsigned long)io | region);
}
-static inline unsigned bio_get_region(struct bio *bio)
+static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
+ unsigned *region)
{
- return bio->bi_io_vec[bio->bi_max_vecs].bv_len;
+ unsigned long val = (unsigned long)bio->bi_private;
+
+ *io = (void *)(val & -(unsigned long)DM_IO_MAX_REGIONS);
+ *region = val & (DM_IO_MAX_REGIONS - 1);
}
/*-----------------------------------------------------------------
@@ -140,10 +162,8 @@ static void endio(struct bio *bio, int error)
/*
* The bio destructor in bio_put() may use the io object.
*/
- io = bio->bi_private;
- region = bio_get_region(bio);
+ retrieve_io_and_region_from_bio(bio, &io, &region);
- bio->bi_max_vecs++;
bio_put(bio);
dec_count(io, region, error);
@@ -243,7 +263,10 @@ static void vm_dp_init(struct dpages *dp, void *data)
static void dm_bio_destructor(struct bio *bio)
{
- struct io *io = bio->bi_private;
+ unsigned region;
+ struct io *io;
+
+ retrieve_io_and_region_from_bio(bio, &io, &region);
bio_free(bio, io->client->bios);
}
@@ -286,26 +309,23 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
unsigned num_bvecs;
sector_t remaining = where->count;
- while (remaining) {
+ /*
+ * where->count may be zero if rw holds a write barrier and we
+ * need to send a zero-sized barrier.
+ */
+ do {
/*
- * Allocate a suitably sized-bio: we add an extra
- * bvec for bio_get/set_region() and decrement bi_max_vecs
- * to hide it from bio_add_page().
+ * Allocate a suitably sized-bio.
*/
num_bvecs = dm_sector_div_up(remaining,
(PAGE_SIZE >> SECTOR_SHIFT));
- num_bvecs = 1 + min_t(int, bio_get_nr_vecs(where->bdev),
- num_bvecs);
- if (unlikely(num_bvecs > BIO_MAX_PAGES))
- num_bvecs = BIO_MAX_PAGES;
+ num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), num_bvecs);
bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
bio->bi_sector = where->sector + (where->count - remaining);
bio->bi_bdev = where->bdev;
bio->bi_end_io = endio;
- bio->bi_private = io;
bio->bi_destructor = dm_bio_destructor;
- bio->bi_max_vecs--;
- bio_set_region(bio, region);
+ store_io_and_region_in_bio(bio, io, region);
/*
* Try and add as many pages as possible.
@@ -323,7 +343,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
atomic_inc(&io->count);
submit_bio(rw, bio);
- }
+ } while (remaining);
}
static void dispatch_io(int rw, unsigned int num_regions,
@@ -333,6 +353,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
int i;
struct dpages old_pages = *dp;
+ BUG_ON(num_regions > DM_IO_MAX_REGIONS);
+
if (sync)
rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
@@ -342,7 +364,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
*/
for (i = 0; i < num_regions; i++) {
*dp = old_pages;
- if (where[i].count)
+ if (where[i].count || (rw & (1 << BIO_RW_BARRIER)))
do_region(rw, i, where + i, dp, io);
}
@@ -357,7 +379,14 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
struct dm_io_region *where, int rw, struct dpages *dp,
unsigned long *error_bits)
{
- struct io io;
+ /*
+ * gcc <= 4.3 can't do the alignment for stack variables, so we must
+ * align it on our own.
+ * volatile prevents the optimizer from removing or reusing
+ * "io_" field from the stack frame (allowed in ANSI C).
+ */
+ volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
+ struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
WARN_ON(1);
@@ -365,33 +394,33 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
}
retry:
- io.error_bits = 0;
- io.eopnotsupp_bits = 0;
- atomic_set(&io.count, 1); /* see dispatch_io() */
- io.sleeper = current;
- io.client = client;
+ io->error_bits = 0;
+ io->eopnotsupp_bits = 0;
+ atomic_set(&io->count, 1); /* see dispatch_io() */
+ io->sleeper = current;
+ io->client = client;
- dispatch_io(rw, num_regions, where, dp, &io, 1);
+ dispatch_io(rw, num_regions, where, dp, io, 1);
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!atomic_read(&io.count))
+ if (!atomic_read(&io->count))
break;
io_schedule();
}
set_current_state(TASK_RUNNING);
- if (io.eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
+ if (io->eopnotsupp_bits && (rw & (1 << BIO_RW_BARRIER))) {
rw &= ~(1 << BIO_RW_BARRIER);
goto retry;
}
if (error_bits)
- *error_bits = io.error_bits;
+ *error_bits = io->error_bits;
- return io.error_bits ? -EIO : 0;
+ return io->error_bits ? -EIO : 0;
}
static int async_io(struct dm_io_client *client, unsigned int num_regions,
@@ -472,3 +501,18 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
&dp, io_req->notify.fn, io_req->notify.context);
}
EXPORT_SYMBOL(dm_io);
+
+int __init dm_io_init(void)
+{
+ _dm_io_cache = KMEM_CACHE(io, 0);
+ if (!_dm_io_cache)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void dm_io_exit(void)
+{
+ kmem_cache_destroy(_dm_io_cache);
+ _dm_io_cache = NULL;
+}
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index a67942931582..1d669322b27c 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -56,6 +56,11 @@ static void dm_hash_remove_all(int keep_open_devices);
*/
static DECLARE_RWSEM(_hash_lock);
+/*
+ * Protects use of mdptr to obtain hash cell name and uuid from mapped device.
+ */
+static DEFINE_MUTEX(dm_hash_cells_mutex);
+
static void init_buckets(struct list_head *buckets)
{
unsigned int i;
@@ -206,7 +211,9 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi
list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
}
dm_get(md);
+ mutex_lock(&dm_hash_cells_mutex);
dm_set_mdptr(md, cell);
+ mutex_unlock(&dm_hash_cells_mutex);
up_write(&_hash_lock);
return 0;
@@ -224,9 +231,11 @@ static void __hash_remove(struct hash_cell *hc)
/* remove from the dev hash */
list_del(&hc->uuid_list);
list_del(&hc->name_list);
+ mutex_lock(&dm_hash_cells_mutex);
dm_set_mdptr(hc->md, NULL);
+ mutex_unlock(&dm_hash_cells_mutex);
- table = dm_get_table(hc->md);
+ table = dm_get_live_table(hc->md);
if (table) {
dm_table_event(table);
dm_table_put(table);
@@ -321,13 +330,15 @@ static int dm_hash_rename(uint32_t cookie, const char *old, const char *new)
*/
list_del(&hc->name_list);
old_name = hc->name;
+ mutex_lock(&dm_hash_cells_mutex);
hc->name = new_name;
+ mutex_unlock(&dm_hash_cells_mutex);
list_add(&hc->name_list, _name_buckets + hash_str(new_name));
/*
* Wake up any dm event waiters.
*/
- table = dm_get_table(hc->md);
+ table = dm_get_live_table(hc->md);
if (table) {
dm_table_event(table);
dm_table_put(table);
@@ -512,8 +523,6 @@ static int list_versions(struct dm_ioctl *param, size_t param_size)
return 0;
}
-
-
static int check_name(const char *name)
{
if (strchr(name, '/')) {
@@ -525,6 +534,40 @@ static int check_name(const char *name)
}
/*
+ * On successful return, the caller must not attempt to acquire
+ * _hash_lock without first calling dm_table_put, because dm_table_destroy
+ * waits for this dm_table_put and could be called under this lock.
+ */
+static struct dm_table *dm_ge