summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 17:30:47 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2011-01-13 17:30:47 -0800
commitf6bcfd94c0a97c11ce9197ade93a08bc8af6e057 (patch)
tree83d867565b4f2a7627b3288f9e000eaf2b217be9
parent509e4aef44eb10e4aef1f81c3c3ff1214671503b (diff)
parent9d09e663d5502c46f2d9481c04c1087e1c2da698 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm
* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (32 commits) dm: raid456 basic support dm: per target unplug callback support dm: introduce target callbacks and congestion callback dm mpath: delay activate_path retry on SCSI_DH_RETRY dm: remove superfluous irq disablement in dm_request_fn dm log: use PTR_ERR value instead of ENOMEM dm snapshot: avoid storing private suspended state dm snapshot: persistent make metadata_wq multithreaded dm: use non reentrant workqueues if equivalent dm: convert workqueues to alloc_ordered dm stripe: switch from local workqueue to system_wq dm: dont use flush_scheduled_work dm snapshot: remove unused dm_snapshot queued_bios_work dm ioctl: suppress needless warning messages dm crypt: add loop aes iv generator dm crypt: add multi key capability dm crypt: add post iv call to iv generator dm crypt: use io thread for reads only if mempool exhausted dm crypt: scale to multiple cpus dm crypt: simplify compatible table output ...
-rw-r--r--Documentation/device-mapper/dm-crypt.txt7
-rw-r--r--Documentation/device-mapper/dm-raid.txt70
-rw-r--r--drivers/md/Kconfig24
-rw-r--r--drivers/md/Makefile1
-rw-r--r--drivers/md/dm-crypt.c618
-rw-r--r--drivers/md/dm-delay.c2
-rw-r--r--drivers/md/dm-ioctl.c111
-rw-r--r--drivers/md/dm-kcopyd.c57
-rw-r--r--drivers/md/dm-log-userspace-base.c139
-rw-r--r--drivers/md/dm-log-userspace-transfer.c1
-rw-r--r--drivers/md/dm-log.c2
-rw-r--r--drivers/md/dm-mpath.c67
-rw-r--r--drivers/md/dm-raid.c697
-rw-r--r--drivers/md/dm-raid1.c19
-rw-r--r--drivers/md/dm-snap-persistent.c4
-rw-r--r--drivers/md/dm-snap.c62
-rw-r--r--drivers/md/dm-stripe.c27
-rw-r--r--drivers/md/dm-table.c19
-rw-r--r--drivers/md/dm.c23
-rw-r--r--include/linux/device-mapper.h12
-rw-r--r--include/linux/dm-ioctl.h14
-rw-r--r--include/linux/dm-log-userspace.h13
22 files changed, 1691 insertions, 298 deletions
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
index 524de926290d..59293ac4a5d0 100644
--- a/Documentation/device-mapper/dm-crypt.txt
+++ b/Documentation/device-mapper/dm-crypt.txt
@@ -8,7 +8,7 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset>
<cipher>
Encryption cipher and an optional IV generation mode.
- (In format cipher-chainmode-ivopts:ivmode).
+ (In format cipher[:keycount]-chainmode-ivopts:ivmode).
Examples:
des
aes-cbc-essiv:sha256
@@ -20,6 +20,11 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset>
Key used for encryption. It is encoded as a hexadecimal number.
You can only use key sizes that are valid for the selected cipher.
+<keycount>
+ Multi-key compatibility mode. You can define <keycount> keys and
+ then sectors are encrypted according to their offsets (sector 0 uses key0;
+ sector 1 uses key1 etc.). <keycount> must be a power of two.
+
<iv_offset>
The IV offset is a sector count that is added to the sector number
before creating the IV.
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
new file mode 100644
index 000000000000..33b6b7071ac8
--- /dev/null
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -0,0 +1,70 @@
+Device-mapper RAID (dm-raid) is a bridge from DM to MD. It
+provides a way to use device-mapper interfaces to access the MD RAID
+drivers.
+
+As with all device-mapper targets, the nominal public interfaces are the
+constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO
+and STATUSTYPE_TABLE). The CTR table looks like the following:
+
+1: <s> <l> raid \
+2: <raid_type> <#raid_params> <raid_params> \
+3: <#raid_devs> <meta_dev1> <dev1> .. <meta_devN> <devN>
+
+Line 1 contains the standard first three arguments to any device-mapper
+target - the start, length, and target type fields. The target type in
+this case is "raid".
+
+Line 2 contains the arguments that define the particular raid
+type/personality/level, the required arguments for that raid type, and
+any optional arguments. Possible raid types include: raid4, raid5_la,
+raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc. (raid1 is
+planned for the future.) The list of required and optional parameters
+is the same for all the current raid types. The required parameters are
+positional, while the optional parameters are given as key/value pairs.
+The possible parameters are as follows:
+ <chunk_size> Chunk size in sectors.
+ [[no]sync] Force/Prevent RAID initialization
+ [rebuild <idx>] Rebuild the drive indicated by the index
+ [daemon_sleep <ms>] Time between bitmap daemon work to clear bits
+ [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
+ [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
+ [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
+ [stripe_cache <sectors>] Stripe cache size for higher RAIDs
+
+Line 3 contains the list of devices that compose the array in
+metadata/data device pairs. If the metadata is stored separately, a '-'
+is given for the metadata device position. If a drive has failed or is
+missing at creation time, a '-' can be given for both the metadata and
+data drives for a given position.
+
+NB. Currently all metadata devices must be specified as '-'.
+
+Examples:
+# RAID4 - 4 data drives, 1 parity
+# No metadata devices specified to hold superblock/bitmap info
+# Chunk size of 1MiB
+# (Lines separated for easy reading)
+0 1960893648 raid \
+ raid4 1 2048 \
+ 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
+
+# RAID4 - 4 data drives, 1 parity (no metadata devices)
+# Chunk size of 1MiB, force RAID initialization,
+# min recovery rate at 20 kiB/sec/disk
+0 1960893648 raid \
+ raid4 4 2048 min_recovery_rate 20 sync\
+ 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
+
+Performing a 'dmsetup table' should display the CTR table used to
+construct the mapping (with possible reordering of optional
+parameters).
+
+Performing a 'dmsetup status' will yield information on the state and
+health of the array. The output is as follows:
+1: <s> <l> raid \
+2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
+
+Line 1 is standard DM output. Line 2 is best shown by example:
+ 0 1960893648 raid raid4 5 AAAAA 2/490221568
+Here we can see the RAID type is raid4, there are 5 devices - all of
+which are 'A'live, and the array is 2/490221568 complete with recovery.
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index bf1a95e31559..98d9ec85e0eb 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -240,6 +240,30 @@ config DM_MIRROR
Allow volume managers to mirror logical volumes, also
needed for live data migration tools such as 'pvmove'.
+config DM_RAID
+ tristate "RAID 4/5/6 target (EXPERIMENTAL)"
+ depends on BLK_DEV_DM && EXPERIMENTAL
+ select MD_RAID456
+ select BLK_DEV_MD
+ ---help---
+ A dm target that supports RAID4, RAID5 and RAID6 mappings
+
+ A RAID-5 set of N drives with a capacity of C MB per drive provides
+ the capacity of C * (N - 1) MB, and protects against a failure
+ of a single drive. For a given sector (row) number, (N - 1) drives
+ contain data sectors, and one drive contains the parity protection.
+ For a RAID-4 set, the parity blocks are present on a single drive,
+ while a RAID-5 set distributes the parity across the drives in one
+ of the available parity distribution methods.
+
+ A RAID-6 set of N drives with a capacity of C MB per drive
+ provides the capacity of C * (N - 2) MB, and protects
+ against a failure of any two drives. For a given sector
+ (row) number, (N - 2) drives contain data sectors, and two
+ drives contains two independent redundancy syndromes. Like
+ RAID-5, RAID-6 distributes the syndromes across the drives
+ in one of the available parity distribution methods.
+
config DM_LOG_USERSPACE
tristate "Mirror userspace logging (EXPERIMENTAL)"
depends on DM_MIRROR && EXPERIMENTAL && NET
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 5e3aac41919d..d0138606c2e8 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o
obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o
+obj-$(CONFIG_DM_RAID) += dm-raid.o
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index d5b0e4c0e702..4e054bd91664 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -18,10 +18,14 @@
#include <linux/crypto.h>
#include <linux/workqueue.h>
#include <linux/backing-dev.h>
+#include <linux/percpu.h>
#include <asm/atomic.h>
#include <linux/scatterlist.h>
#include <asm/page.h>
#include <asm/unaligned.h>
+#include <crypto/hash.h>
+#include <crypto/md5.h>
+#include <crypto/algapi.h>
#include <linux/device-mapper.h>
@@ -63,6 +67,7 @@ struct dm_crypt_request {
struct convert_context *ctx;
struct scatterlist sg_in;
struct scatterlist sg_out;
+ sector_t iv_sector;
};
struct crypt_config;
@@ -73,11 +78,13 @@ struct crypt_iv_operations {
void (*dtr)(struct crypt_config *cc);
int (*init)(struct crypt_config *cc);
int (*wipe)(struct crypt_config *cc);
- int (*generator)(struct crypt_config *cc, u8 *iv, sector_t sector);
+ int (*generator)(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq);
+ int (*post)(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq);
};
struct iv_essiv_private {
- struct crypto_cipher *tfm;
struct crypto_hash *hash_tfm;
u8 *salt;
};
@@ -86,11 +93,32 @@ struct iv_benbi_private {
int shift;
};
+#define LMK_SEED_SIZE 64 /* hash + 0 */
+struct iv_lmk_private {
+ struct crypto_shash *hash_tfm;
+ u8 *seed;
+};
+
/*
* Crypt: maps a linear range of a block device
* and encrypts / decrypts at the same time.
*/
enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID };
+
+/*
+ * Duplicated per-CPU state for cipher.
+ */
+struct crypt_cpu {
+ struct ablkcipher_request *req;
+ /* ESSIV: struct crypto_cipher *essiv_tfm */
+ void *iv_private;
+ struct crypto_ablkcipher *tfms[0];
+};
+
+/*
+ * The fields in here must be read only after initialization,
+ * changing state should be in crypt_cpu.
+ */
struct crypt_config {
struct dm_dev *dev;
sector_t start;
@@ -108,17 +136,25 @@ struct crypt_config {
struct workqueue_struct *crypt_queue;
char *cipher;
- char *cipher_mode;
+ char *cipher_string;
struct crypt_iv_operations *iv_gen_ops;
union {
struct iv_essiv_private essiv;
struct iv_benbi_private benbi;
+ struct iv_lmk_private lmk;
} iv_gen_private;
sector_t iv_offset;
unsigned int iv_size;
/*
+ * Duplicated per cpu state. Access through
+ * per_cpu_ptr() only.
+ */
+ struct crypt_cpu __percpu *cpu;
+ unsigned tfms_count;
+
+ /*
* Layout of each crypto request:
*
* struct ablkcipher_request
@@ -132,11 +168,10 @@ struct crypt_config {
* correctly aligned.
*/
unsigned int dmreq_start;
- struct ablkcipher_request *req;
- struct crypto_ablkcipher *tfm;
unsigned long flags;
unsigned int key_size;
+ unsigned int key_parts;
u8 key[0];
};
@@ -148,6 +183,20 @@ static struct kmem_cache *_crypt_io_pool;
static void clone_init(struct dm_crypt_io *, struct bio *);
static void kcryptd_queue_crypt(struct dm_crypt_io *io);
+static u8 *iv_of_dmreq(struct crypt_config *cc, struct dm_crypt_request *dmreq);
+
+static struct crypt_cpu *this_crypt_config(struct crypt_config *cc)
+{
+ return this_cpu_ptr(cc->cpu);
+}
+
+/*
+ * Use this to access cipher attributes that are the same for each CPU.
+ */
+static struct crypto_ablkcipher *any_tfm(struct crypt_config *cc)
+{
+ return __this_cpu_ptr(cc->cpu)->tfms[0];
+}
/*
* Different IV generation algorithms:
@@ -168,23 +217,38 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io);
* null: the initial vector is always zero. Provides compatibility with
* obsolete loop_fish2 devices. Do not use for new devices.
*
+ * lmk: Compatible implementation of the block chaining mode used
+ * by the Loop-AES block device encryption system
+ * designed by Jari Ruusu. See http://loop-aes.sourceforge.net/
+ * It operates on full 512 byte sectors and uses CBC
+ * with an IV derived from the sector number, the data and
+ * optionally extra IV seed.
+ * This means that after decryption the first block
+ * of sector must be tweaked according to decrypted data.
+ * Loop-AES can use three encryption schemes:
+ * version 1: is plain aes-cbc mode
+ * version 2: uses 64 multikey scheme with lmk IV generator
+ * version 3: the same as version 2 with additional IV seed
+ * (it uses 65 keys, last key is used as IV seed)
+ *
* plumb: unimplemented, see:
* http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
*/
-static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
+static int crypt_iv_plain_gen(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq)
{
memset(iv, 0, cc->iv_size);
- *(u32 *)iv = cpu_to_le32(sector & 0xffffffff);
+ *(u32 *)iv = cpu_to_le32(dmreq->iv_sector & 0xffffffff);
return 0;
}
static int crypt_iv_plain64_gen(struct crypt_config *cc, u8 *iv,
- sector_t sector)
+ struct dm_crypt_request *dmreq)
{
memset(iv, 0, cc->iv_size);
- *(u64 *)iv = cpu_to_le64(sector);
+ *(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
return 0;
}
@@ -195,7 +259,8 @@ static int crypt_iv_essiv_init(struct crypt_config *cc)
struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
struct hash_desc desc;
struct scatterlist sg;
- int err;
+ struct crypto_cipher *essiv_tfm;
+ int err, cpu;
sg_init_one(&sg, cc->key, cc->key_size);
desc.tfm = essiv->hash_tfm;
@@ -205,8 +270,16 @@ static int crypt_iv_essiv_init(struct crypt_config *cc)
if (err)
return err;
- return crypto_cipher_setkey(essiv->tfm, essiv->salt,
+ for_each_possible_cpu(cpu) {
+ essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private,
+
+ err = crypto_cipher_setkey(essiv_tfm, essiv->salt,
crypto_hash_digestsize(essiv->hash_tfm));
+ if (err)
+ return err;
+ }
+
+ return 0;
}
/* Wipe salt and reset key derived from volume key */
@@ -214,24 +287,76 @@ static int crypt_iv_essiv_wipe(struct crypt_config *cc)
{
struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
unsigned salt_size = crypto_hash_digestsize(essiv->hash_tfm);
+ struct crypto_cipher *essiv_tfm;
+ int cpu, r, err = 0;
memset(essiv->salt, 0, salt_size);
- return crypto_cipher_setkey(essiv->tfm, essiv->salt, salt_size);
+ for_each_possible_cpu(cpu) {
+ essiv_tfm = per_cpu_ptr(cc->cpu, cpu)->iv_private;
+ r = crypto_cipher_setkey(essiv_tfm, essiv->salt, salt_size);
+ if (r)
+ err = r;
+ }
+
+ return err;
+}
+
+/* Set up per cpu cipher state */
+static struct crypto_cipher *setup_essiv_cpu(struct crypt_config *cc,
+ struct dm_target *ti,
+ u8 *salt, unsigned saltsize)
+{
+ struct crypto_cipher *essiv_tfm;
+ int err;
+
+ /* Setup the essiv_tfm with the given salt */
+ essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(essiv_tfm)) {
+ ti->error = "Error allocating crypto tfm for ESSIV";
+ return essiv_tfm;
+ }
+
+ if (crypto_cipher_blocksize(essiv_tfm) !=
+ crypto_ablkcipher_ivsize(any_tfm(cc))) {
+ ti->error = "Block size of ESSIV cipher does "
+ "not match IV size of block cipher";
+ crypto_free_cipher(essiv_tfm);
+ return ERR_PTR(-EINVAL);
+ }
+
+ err = crypto_cipher_setkey(essiv_tfm, salt, saltsize);
+ if (err) {
+ ti->error = "Failed to set key for ESSIV cipher";
+ crypto_free_cipher(essiv_tfm);
+ return ERR_PTR(err);
+ }
+
+ return essiv_tfm;
}
static void crypt_iv_essiv_dtr(struct crypt_config *cc)
{
+ int cpu;
+ struct crypt_cpu *cpu_cc;
+ struct crypto_cipher *essiv_tfm;
struct iv_essiv_private *essiv = &cc->iv_gen_private.essiv;
- crypto_free_cipher(essiv->tfm);
- essiv->tfm = NULL;
-
crypto_free_hash(essiv->hash_tfm);
essiv->hash_tfm = NULL;
kzfree(essiv->salt);
essiv->salt = NULL;
+
+ for_each_possible_cpu(cpu) {
+ cpu_cc = per_cpu_ptr(cc->cpu, cpu);
+ essiv_tfm = cpu_cc->iv_private;
+
+ if (essiv_tfm)
+ crypto_free_cipher(essiv_tfm);
+
+ cpu_cc->iv_private = NULL;
+ }
}
static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
@@ -240,7 +365,7 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
struct crypto_cipher *essiv_tfm = NULL;
struct crypto_hash *hash_tfm = NULL;
u8 *salt = NULL;
- int err;
+ int err, cpu;
if (!opts) {
ti->error = "Digest algorithm missing for ESSIV mode";
@@ -262,48 +387,44 @@ static int crypt_iv_essiv_ctr(struct crypt_config *cc, struct dm_target *ti,
goto bad;
}
- /* Allocate essiv_tfm */
- essiv_tfm = crypto_alloc_cipher(cc->cipher, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(essiv_tfm)) {
- ti->error = "Error allocating crypto tfm for ESSIV";
- err = PTR_ERR(essiv_tfm);
- goto bad;
- }
- if (crypto_cipher_blocksize(essiv_tfm) !=
- crypto_ablkcipher_ivsize(cc->tfm)) {
- ti->error = "Block size of ESSIV cipher does "
- "not match IV size of block cipher";
- err = -EINVAL;
- goto bad;
- }
-
cc->iv_gen_private.essiv.salt = salt;
- cc->iv_gen_private.essiv.tfm = essiv_tfm;
cc->iv_gen_private.essiv.hash_tfm = hash_tfm;
+ for_each_possible_cpu(cpu) {
+ essiv_tfm = setup_essiv_cpu(cc, ti, salt,
+ crypto_hash_digestsize(hash_tfm));
+ if (IS_ERR(essiv_tfm)) {
+ crypt_iv_essiv_dtr(cc);
+ return PTR_ERR(essiv_tfm);
+ }
+ per_cpu_ptr(cc->cpu, cpu)->iv_private = essiv_tfm;
+ }
+
return 0;
bad:
- if (essiv_tfm && !IS_ERR(essiv_tfm))
- crypto_free_cipher(essiv_tfm);
if (hash_tfm && !IS_ERR(hash_tfm))
crypto_free_hash(hash_tfm);
kfree(salt);
return err;
}
-static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
+static int crypt_iv_essiv_gen(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq)
{
+ struct crypto_cipher *essiv_tfm = this_crypt_config(cc)->iv_private;
+
memset(iv, 0, cc->iv_size);
- *(u64 *)iv = cpu_to_le64(sector);
- crypto_cipher_encrypt_one(cc->iv_gen_private.essiv.tfm, iv, iv);
+ *(u64 *)iv = cpu_to_le64(dmreq->iv_sector);
+ crypto_cipher_encrypt_one(essiv_tfm, iv, iv);
+
return 0;
}
static int crypt_iv_benbi_ctr(struct crypt_config *cc, struct dm_target *ti,
const char *opts)
{
- unsigned bs = crypto_ablkcipher_blocksize(cc->tfm);
+ unsigned bs = crypto_ablkcipher_blocksize(any_tfm(cc));
int log = ilog2(bs);
/* we need to calculate how far we must shift the sector count
@@ -328,25 +449,177 @@ static void crypt_iv_benbi_dtr(struct crypt_config *cc)
{
}
-static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
+static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq)
{
__be64 val;
memset(iv, 0, cc->iv_size - sizeof(u64)); /* rest is cleared below */
- val = cpu_to_be64(((u64)sector << cc->iv_gen_private.benbi.shift) + 1);
+ val = cpu_to_be64(((u64)dmreq->iv_sector << cc->iv_gen_private.benbi.shift) + 1);
put_unaligned(val, (__be64 *)(iv + cc->iv_size - sizeof(u64)));
return 0;
}
-static int crypt_iv_null_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
+static int crypt_iv_null_gen(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq)
{
memset(iv, 0, cc->iv_size);
return 0;
}
+static void crypt_iv_lmk_dtr(struct crypt_config *cc)
+{
+ struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
+
+ if (lmk->hash_tfm && !IS_ERR(lmk->hash_tfm))
+ crypto_free_shash(lmk->hash_tfm);
+ lmk->hash_tfm = NULL;
+
+ kzfree(lmk->seed);
+ lmk->seed = NULL;
+}
+
+static int crypt_iv_lmk_ctr(struct crypt_config *cc, struct dm_target *ti,
+ const char *opts)
+{
+ struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
+
+ lmk->hash_tfm = crypto_alloc_shash("md5", 0, 0);
+ if (IS_ERR(lmk->hash_tfm)) {
+ ti->error = "Error initializing LMK hash";
+ return PTR_ERR(lmk->hash_tfm);
+ }
+
+ /* No seed in LMK version 2 */
+ if (cc->key_parts == cc->tfms_count) {
+ lmk->seed = NULL;
+ return 0;
+ }
+
+ lmk->seed = kzalloc(LMK_SEED_SIZE, GFP_KERNEL);
+ if (!lmk->seed) {
+ crypt_iv_lmk_dtr(cc);
+ ti->error = "Error kmallocing seed storage in LMK";
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int crypt_iv_lmk_init(struct crypt_config *cc)
+{
+ struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
+ int subkey_size = cc->key_size / cc->key_parts;
+
+ /* LMK seed is on the position of LMK_KEYS + 1 key */
+ if (lmk->seed)
+ memcpy(lmk->seed, cc->key + (cc->tfms_count * subkey_size),
+ crypto_shash_digestsize(lmk->hash_tfm));
+
+ return 0;
+}
+
+static int crypt_iv_lmk_wipe(struct crypt_config *cc)
+{
+ struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
+
+ if (lmk->seed)
+ memset(lmk->seed, 0, LMK_SEED_SIZE);
+
+ return 0;
+}
+
+static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq,
+ u8 *data)
+{
+ struct iv_lmk_private *lmk = &cc->iv_gen_private.lmk;
+ struct {
+ struct shash_desc desc;
+ char ctx[crypto_shash_descsize(lmk->hash_tfm)];
+ } sdesc;
+ struct md5_state md5state;
+ u32 buf[4];
+ int i, r;
+
+ sdesc.desc.tfm = lmk->hash_tfm;
+ sdesc.desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ r = crypto_shash_init(&sdesc.desc);
+ if (r)
+ return r;
+
+ if (lmk->seed) {
+ r = crypto_shash_update(&sdesc.desc, lmk->seed, LMK_SEED_SIZE);
+ if (r)
+ return r;
+ }
+
+ /* Sector is always 512B, block size 16, add data of blocks 1-31 */
+ r = crypto_shash_update(&sdesc.desc, data + 16, 16 * 31);
+ if (r)
+ return r;
+
+ /* Sector is cropped to 56 bits here */
+ buf[0] = cpu_to_le32(dmreq->iv_sector & 0xFFFFFFFF);
+ buf[1] = cpu_to_le32((((u64)dmreq->iv_sector >> 32) & 0x00FFFFFF) | 0x80000000);
+ buf[2] = cpu_to_le32(4024);
+ buf[3] = 0;
+ r = crypto_shash_update(&sdesc.desc, (u8 *)buf, sizeof(buf));
+ if (r)
+ return r;
+
+ /* No MD5 padding here */
+ r = crypto_shash_export(&sdesc.desc, &md5state);
+ if (r)
+ return r;
+
+ for (i = 0; i < MD5_HASH_WORDS; i++)
+ __cpu_to_le32s(&md5state.hash[i]);
+ memcpy(iv, &md5state.hash, cc->iv_size);
+
+ return 0;
+}
+
+static int crypt_iv_lmk_gen(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq)
+{
+ u8 *src;
+ int r = 0;
+
+ if (bio_data_dir(dmreq->ctx->bio_in) == WRITE) {
+ src = kmap_atomic(sg_page(&dmreq->sg_in), KM_USER0);
+ r = crypt_iv_lmk_one(cc, iv, dmreq, src + dmreq->sg_in.offset);
+ kunmap_atomic(src, KM_USER0);
+ } else
+ memset(iv, 0, cc->iv_size);
+
+ return r;
+}
+
+static int crypt_iv_lmk_post(struct crypt_config *cc, u8 *iv,
+ struct dm_crypt_request *dmreq)
+{
+ u8 *dst;
+ int r;
+
+ if (bio_data_dir(dmreq->ctx->bio_in) == WRITE)
+ return 0;
+
+ dst = kmap_atomic(sg_page(&dmreq->sg_out), KM_USER0);
+ r = crypt_iv_lmk_one(cc, iv, dmreq, dst + dmreq->sg_out.offset);
+
+ /* Tweak the first block of plaintext sector */
+ if (!r)
+ crypto_xor(dst + dmreq->sg_out.offset, iv, cc->iv_size);
+
+ kunmap_atomic(dst, KM_USER0);
+ return r;
+}
+
static struct crypt_iv_operations crypt_iv_plain_ops = {
.generator = crypt_iv_plain_gen
};
@@ -373,6 +646,15 @@ static struct crypt_iv_operations crypt_iv_null_ops = {
.generator = crypt_iv_null_gen
};
+static struct crypt_iv_operations crypt_iv_lmk_ops = {
+ .ctr = crypt_iv_lmk_ctr,
+ .dtr = crypt_iv_lmk_dtr,
+ .init = crypt_iv_lmk_init,
+ .wipe = crypt_iv_lmk_wipe,
+ .generator = crypt_iv_lmk_gen,
+ .post = crypt_iv_lmk_post
+};
+
static void crypt_convert_init(struct crypt_config *cc,
struct convert_context *ctx,
struct bio *bio_out, struct bio *bio_in,
@@ -400,6 +682,13 @@ static struct ablkcipher_request *req_of_dmreq(struct crypt_config *cc,
return (struct ablkcipher_request *)((char *)dmreq - cc->dmreq_start);
}
+static u8 *iv_of_dmreq(struct crypt_config *cc,
+ struct dm_crypt_request *dmreq)
+{
+ return (u8 *)ALIGN((unsigned long)(dmreq + 1),
+ crypto_ablkcipher_alignmask(any_tfm(cc)) + 1);
+}
+
static int crypt_convert_block(struct crypt_config *cc,
struct convert_context *ctx,
struct ablkcipher_request *req)
@@ -411,9 +700,9 @@ static int crypt_convert_block(struct crypt_config *cc,
int r = 0;
dmreq = dmreq_of_req(cc, req);
- iv = (u8 *)ALIGN((unsigned long)(dmreq + 1),
- crypto_ablkcipher_alignmask(cc->tfm) + 1);
+ iv = iv_of_dmreq(cc, dmreq);
+ dmreq->iv_sector = ctx->sector;
dmreq->ctx = ctx;
sg_init_table(&dmreq->sg_in, 1);
sg_set_page(&dmreq->sg_in, bv_in->bv_page, 1 << SECTOR_SHIFT,
@@ -436,7 +725,7 @@ static int crypt_convert_block(struct crypt_config *cc,
}
if (cc->iv_gen_ops) {
- r = cc->iv_gen_ops->generator(cc, iv, ctx->sector);
+ r = cc->iv_gen_ops->generator(cc, iv, dmreq);
if (r < 0)
return r;
}
@@ -449,21 +738,28 @@ static int crypt_convert_block(struct crypt_config *cc,
else
r = crypto_ablkcipher_decrypt(req);
+ if (!r && cc->iv_gen_ops && cc->iv_gen_ops->post)
+ r = cc->iv_gen_ops->post(cc, iv, dmreq);
+
return r;
}
static void kcryptd_async_done(struct crypto_async_request *async_req,
int error);
+
static void crypt_alloc_req(struct crypt_config *cc,
struct convert_context *ctx)
{
- if (!cc->req)
- cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
- ablkcipher_request_set_tfm(cc->req, cc->tfm);
- ablkcipher_request_set_callback(cc->req, CRYPTO_TFM_REQ_MAY_BACKLOG |
- CRYPTO_TFM_REQ_MAY_SLEEP,
- kcryptd_async_done,
- dmreq_of_req(cc, cc->req));
+ struct crypt_cpu *this_cc = this_crypt_config(cc);
+ unsigned key_index = ctx->sector & (cc->tfms_count - 1);
+
+ if (!this_cc->req)
+ this_cc->req = mempool_alloc(cc->req_pool, GFP_NOIO);
+
+ ablkcipher_request_set_tfm(this_cc->req, this_cc->tfms[key_index]);
+ ablkcipher_request_set_callback(this_cc->req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ kcryptd_async_done, dmreq_of_req(cc, this_cc->req));
}
/*
@@ -472,6 +768,7 @@ static void crypt_alloc_req(struct crypt_config *cc,
static int crypt_convert(struct crypt_config *cc,
struct convert_context *ctx)
{
+ struct crypt_cpu *this_cc = this_crypt_config(cc);
int r;
atomic_set(&ctx->pending, 1);
@@ -483,7 +780,7 @@ static int crypt_convert(struct crypt_config *cc,
atomic_inc(&ctx->pending);
- r = crypt_convert_block(cc, ctx, cc->req);
+ r = crypt_convert_block(cc, ctx, this_cc->req);
switch (r) {
/* async */
@@ -492,7 +789,7 @@ static int crypt_convert(struct crypt_config *cc,
INIT_COMPLETION(ctx->restart);
/* fall through*/
case -EINPROGRESS:
- cc->req = NULL;
+ this_cc->req = NULL;
ctx->sector++;
continue;
@@ -651,6 +948,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
* They must be separated as otherwise the final stages could be
* starved by new requests which can block in the first stages due
* to memory allocation.
+ *
+ * The work is done per CPU global for all dm-crypt instances.
+ * They should not depend on each other and do not block.
*/
static void crypt_endio(struct bio *clone, int error)
{
@@ -691,26 +991,30 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
clone->bi_destructor = dm_crypt_bio_destructor;
}
-static void kcryptd_io_read(struct dm_crypt_io *io)
+static void kcryptd_unplug(struct crypt_config *cc)
+{
+ blk_unplug(bdev_get_queue(cc->dev->bdev));
+}
+
+static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
{
struct crypt_config *cc = io->target->private;
struct bio *base_bio = io->base_bio;
struct bio *clone;
- crypt_inc_pending(io);
-
/*
* The block layer might modify the bvec array, so always
* copy the required bvecs because we need the original
* one in order to decrypt the whole bio data *afterwards*.
*/
- clone = bio_alloc_bioset(GFP_NOIO, bio_segments(base_bio), cc->bs);
- if (unlikely(!clone)) {
- io->error = -ENOMEM;
- crypt_dec_pending(io);
- return;
+ clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs);
+ if (!clone) {
+ kcryptd_unplug(cc);
+ return 1;
}
+ crypt_inc_pending(io);
+
clone_init(io, clone);
clone->bi_idx = 0;
clone->bi_vcnt = bio_segments(base_bio);
@@ -720,6 +1024,7 @@ static void kcryptd_io_read(struct dm_crypt_io *io)
sizeof(struct bio_vec) * clone->bi_vcnt);
generic_make_request(clone);
+ return 0;
}
static void kcryptd_io_write(struct dm_crypt_io *io)
@@ -732,9 +1037,12 @@ static void kcryptd_io(struct work_struct *work)
{
struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
- if (bio_data_dir(io->base_bio) == READ)
- kcryptd_io_read(io);
- else
+ if (bio_data_dir(io->base_bio) == READ) {
+ crypt_inc_pending(io);
+ if (kcryptd_io_read(io, GFP_NOIO))
+ io->error = -ENOMEM;
+ crypt_dec_pending(io);
+ } else
kcryptd_io_write(io);
}
@@ -901,6 +1209,9 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
return;
}
+ if (!error && cc->iv_gen_ops && cc->iv_gen_ops->post)
+ error = cc->iv_gen_ops->post(cc, iv_of_dmreq(cc, dmreq), dmreq);
+
mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool);
if (!atomic_dec_and_test(&ctx->pending))
@@ -971,34 +1282,84 @@ static void crypt_encode_key(char *hex, u8 *key, unsigned int size)
}
}
-static int crypt_set_key(struct crypt_config *cc, char *key)
+static void crypt_free_tfms(struct crypt_config *cc, int cpu)
{
- unsigned key_size = strlen(key) >> 1;
+ struct crypt_cpu *cpu_cc = per_cpu_ptr(cc->cpu, cpu);
+ unsigned i;
- if (cc->key_size && cc->key_size != key_size)
+ for (i = 0; i < cc->tfms_count; i++)
+ if (cpu_cc->tfms[i] && !IS_ERR(cpu_cc->tfms[i])) {
+ crypto_free_ablkcipher(cpu_cc->tfms[i]);
+