summaryrefslogtreecommitdiffstats
path: root/arch/arm64
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-02-23 09:54:19 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2017-02-23 09:54:19 -0800
commit5bcbe22ca47da04cda3a858cef67f55b550c1d13 (patch)
tree49bd61e32eb2d652085a49182436322a3e0e9840 /arch/arm64
parent1db934a5b77a9e37c4742c704fde6af233187a98 (diff)
parent12cb3a1c4184f891d965d1f39f8cfcc9ef617647 (diff)
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto update from Herbert Xu: "API: - Try to catch hash output overrun in testmgr - Introduce walksize attribute for batched walking - Make crypto_xor() and crypto_inc() alignment agnostic Algorithms: - Add time-invariant AES algorithm - Add standalone CBCMAC algorithm Drivers: - Add NEON acclerated chacha20 on ARM/ARM64 - Expose AES-CTR as synchronous skcipher on ARM64 - Add scalar AES implementation on ARM64 - Improve scalar AES implementation on ARM - Improve NEON AES implementation on ARM/ARM64 - Merge CRC32 and PMULL instruction based drivers on ARM64 - Add NEON acclerated CBCMAC/CMAC/XCBC AES on ARM64 - Add IPsec AUTHENC implementation in atmel - Add Support for Octeon-tx CPT Engine - Add Broadcom SPU driver - Add MediaTek driver" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (142 commits) crypto: xts - Add ECB dependency crypto: cavium - switch to pci_alloc_irq_vectors crypto: cavium - switch to pci_alloc_irq_vectors crypto: cavium - remove dead MSI-X related define crypto: brcm - Avoid double free in ahash_finup() crypto: cavium - fix Kconfig dependencies crypto: cavium - cpt_bind_vq_to_grp could return an error code crypto: doc - fix typo hwrng: omap - update Kconfig help description crypto: ccm - drop unnecessary minimum 32-bit alignment crypto: ccm - honour alignmask of subordinate MAC cipher crypto: caam - fix state buffer DMA (un)mapping crypto: caam - abstract ahash request double buffering crypto: caam - fix error path for ctx_dma mapping failure crypto: caam - fix DMA API leaks for multiple setkey() calls crypto: caam - don't dma_map key for hash algorithms crypto: caam - use dma_map_sg() return code crypto: caam - replace sg_count() with sg_nents_for_len() crypto: caam - check sg_count() return value crypto: caam - fix HW S/G in ablkcipher_giv_edesc_alloc() ..
Diffstat (limited to 'arch/arm64')
-rw-r--r--arch/arm64/configs/defconfig1
-rw-r--r--arch/arm64/crypto/Kconfig24
-rw-r--r--arch/arm64/crypto/Makefile13
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c1
-rw-r--r--arch/arm64/crypto/aes-cipher-core.S110
-rw-r--r--arch/arm64/crypto/aes-cipher-glue.c69
-rw-r--r--arch/arm64/crypto/aes-glue.c281
-rw-r--r--arch/arm64/crypto/aes-modes.S37
-rw-r--r--arch/arm64/crypto/aes-neon.S235
-rw-r--r--arch/arm64/crypto/aes-neonbs-core.S972
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c439
-rw-r--r--arch/arm64/crypto/chacha20-neon-core.S450
-rw-r--r--arch/arm64/crypto/chacha20-neon-glue.c126
-rw-r--r--arch/arm64/crypto/crc32-arm64.c290
-rw-r--r--arch/arm64/crypto/crc32-ce-glue.c49
15 files changed, 2632 insertions, 465 deletions
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 33b744d54739..6fc6f5a2a6e5 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -516,4 +516,3 @@ CONFIG_CRYPTO_GHASH_ARM64_CE=y
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
# CONFIG_CRYPTO_AES_ARM64_NEON_BLK is not set
-CONFIG_CRYPTO_CRC32_ARM64=y
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 450a85df041a..d92293747d63 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -37,10 +37,14 @@ config CRYPTO_CRCT10DIF_ARM64_CE
select CRYPTO_HASH
config CRYPTO_CRC32_ARM64_CE
- tristate "CRC32 and CRC32C digest algorithms using PMULL instructions"
- depends on KERNEL_MODE_NEON && CRC32
+ tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
+ depends on CRC32
select CRYPTO_HASH
+config CRYPTO_AES_ARM64
+ tristate "AES core cipher using scalar instructions"
+ select CRYPTO_AES
+
config CRYPTO_AES_ARM64_CE
tristate "AES core cipher using ARMv8 Crypto Extensions"
depends on ARM64 && KERNEL_MODE_NEON
@@ -67,9 +71,17 @@ config CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_AES
select CRYPTO_SIMD
-config CRYPTO_CRC32_ARM64
- tristate "CRC32 and CRC32C using optional ARMv8 instructions"
- depends on ARM64
- select CRYPTO_HASH
+config CRYPTO_CHACHA20_NEON
+ tristate "NEON accelerated ChaCha20 symmetric cipher"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_CHACHA20
+
+config CRYPTO_AES_ARM64_BS
+ tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_BLKCIPHER
+ select CRYPTO_AES_ARM64_NEON_BLK
+ select CRYPTO_SIMD
endif
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index aa8888d7b744..b5edc5918c28 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -41,15 +41,20 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
sha512-arm64-y := sha512-glue.o sha512-core.o
+obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
+chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
+aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
+
+obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o
+aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
+
AFLAGS_aes-ce.o := -DINTERLEAVE=4
AFLAGS_aes-neon.o := -DINTERLEAVE=4
CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
-obj-$(CONFIG_CRYPTO_CRC32_ARM64) += crc32-arm64.o
-
-CFLAGS_crc32-arm64.o := -mcpu=generic+crc
-
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
$(call if_changed_rule,cc_o_c)
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index cc5515dac74a..6a7dbc7c83a6 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -258,7 +258,6 @@ static struct aead_alg ccm_aes_alg = {
.cra_priority = 300,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.ivsize = AES_BLOCK_SIZE,
diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
new file mode 100644
index 000000000000..f2f9cc519309
--- /dev/null
+++ b/arch/arm64/crypto/aes-cipher-core.S
@@ -0,0 +1,110 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+ .text
+
+ rk .req x0
+ out .req x1
+ in .req x2
+ rounds .req x3
+ tt .req x4
+ lt .req x2
+
+ .macro __pair, enc, reg0, reg1, in0, in1e, in1d, shift
+ ubfx \reg0, \in0, #\shift, #8
+ .if \enc
+ ubfx \reg1, \in1e, #\shift, #8
+ .else
+ ubfx \reg1, \in1d, #\shift, #8
+ .endif
+ ldr \reg0, [tt, \reg0, uxtw #2]
+ ldr \reg1, [tt, \reg1, uxtw #2]
+ .endm
+
+ .macro __hround, out0, out1, in0, in1, in2, in3, t0, t1, enc
+ ldp \out0, \out1, [rk], #8
+
+ __pair \enc, w13, w14, \in0, \in1, \in3, 0
+ __pair \enc, w15, w16, \in1, \in2, \in0, 8
+ __pair \enc, w17, w18, \in2, \in3, \in1, 16
+ __pair \enc, \t0, \t1, \in3, \in0, \in2, 24
+
+ eor \out0, \out0, w13
+ eor \out1, \out1, w14
+ eor \out0, \out0, w15, ror #24
+ eor \out1, \out1, w16, ror #24
+ eor \out0, \out0, w17, ror #16
+ eor \out1, \out1, w18, ror #16
+ eor \out0, \out0, \t0, ror #8
+ eor \out1, \out1, \t1, ror #8
+ .endm
+
+ .macro fround, out0, out1, out2, out3, in0, in1, in2, in3
+ __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1
+ __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1
+ .endm
+
+ .macro iround, out0, out1, out2, out3, in0, in1, in2, in3
+ __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0
+ __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0
+ .endm
+
+ .macro do_crypt, round, ttab, ltab
+ ldp w5, w6, [in]
+ ldp w7, w8, [in, #8]
+ ldp w9, w10, [rk], #16
+ ldp w11, w12, [rk, #-8]
+
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+CPU_BE( rev w8, w8 )
+
+ eor w5, w5, w9
+ eor w6, w6, w10
+ eor w7, w7, w11
+ eor w8, w8, w12
+
+ adr_l tt, \ttab
+ adr_l lt, \ltab
+
+ tbnz rounds, #1, 1f
+
+0: \round w9, w10, w11, w12, w5, w6, w7, w8
+ \round w5, w6, w7, w8, w9, w10, w11, w12
+
+1: subs rounds, rounds, #4
+ \round w9, w10, w11, w12, w5, w6, w7, w8
+ csel tt, tt, lt, hi
+ \round w5, w6, w7, w8, w9, w10, w11, w12
+ b.hi 0b
+
+CPU_BE( rev w5, w5 )
+CPU_BE( rev w6, w6 )
+CPU_BE( rev w7, w7 )
+CPU_BE( rev w8, w8 )
+
+ stp w5, w6, [out]
+ stp w7, w8, [out, #8]
+ ret
+ .endm
+
+ .align 5
+ENTRY(__aes_arm64_encrypt)
+ do_crypt fround, crypto_ft_tab, crypto_fl_tab
+ENDPROC(__aes_arm64_encrypt)
+
+ .align 5
+ENTRY(__aes_arm64_decrypt)
+ do_crypt iround, crypto_it_tab, crypto_il_tab
+ENDPROC(__aes_arm64_decrypt)
diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c
new file mode 100644
index 000000000000..7288e7cbebff
--- /dev/null
+++ b/arch/arm64/crypto/aes-cipher-glue.c
@@ -0,0 +1,69 @@
+/*
+ * Scalar AES core transform
+ *
+ * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <crypto/aes.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+EXPORT_SYMBOL(__aes_arm64_encrypt);
+
+asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
+EXPORT_SYMBOL(__aes_arm64_decrypt);
+
+static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ int rounds = 6 + ctx->key_length / 4;
+
+ __aes_arm64_encrypt(ctx->key_enc, out, in, rounds);
+}
+
+static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+{
+ struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
+ int rounds = 6 + ctx->key_length / 4;
+
+ __aes_arm64_decrypt(ctx->key_dec, out, in, rounds);
+}
+
+static struct crypto_alg aes_alg = {
+ .cra_name = "aes",
+ .cra_driver_name = "aes-arm64",
+ .cra_priority = 200,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
+ .cra_module = THIS_MODULE,
+
+ .cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
+ .cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
+ .cra_cipher.cia_setkey = crypto_aes_set_key,
+ .cra_cipher.cia_encrypt = aes_encrypt,
+ .cra_cipher.cia_decrypt = aes_decrypt
+};
+
+static int __init aes_init(void)
+{
+ return crypto_register_alg(&aes_alg);
+}
+
+static void __exit aes_fini(void)
+{
+ crypto_unregister_alg(&aes_alg);
+}
+
+module_init(aes_init);
+module_exit(aes_fini);
+
+MODULE_DESCRIPTION("Scalar AES cipher for arm64");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("aes");
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 4e3f8adb1793..bcf596b0197e 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -1,7 +1,7 @@
/*
* linux/arch/arm64/crypto/aes-glue.c - wrapper code for ARMv8 AES
*
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -11,6 +11,7 @@
#include <asm/neon.h>
#include <asm/hwcap.h>
#include <crypto/aes.h>
+#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
@@ -31,6 +32,7 @@
#define aes_ctr_encrypt ce_aes_ctr_encrypt
#define aes_xts_encrypt ce_aes_xts_encrypt
#define aes_xts_decrypt ce_aes_xts_decrypt
+#define aes_mac_update ce_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#else
#define MODE "neon"
@@ -44,11 +46,15 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
#define aes_ctr_encrypt neon_aes_ctr_encrypt
#define aes_xts_encrypt neon_aes_xts_encrypt
#define aes_xts_decrypt neon_aes_xts_decrypt
+#define aes_mac_update neon_aes_mac_update
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");
+MODULE_ALIAS_CRYPTO("cmac(aes)");
+MODULE_ALIAS_CRYPTO("xcbc(aes)");
+MODULE_ALIAS_CRYPTO("cbcmac(aes)");
#endif
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
@@ -75,11 +81,25 @@ asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
int rounds, int blocks, u8 const rk2[], u8 iv[],
int first);
+asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+ int blocks, u8 dg[], int enc_before,
+ int enc_after);
+
struct crypto_aes_xts_ctx {
struct crypto_aes_ctx key1;
struct crypto_aes_ctx __aligned(8) key2;
};
+struct mac_tfm_ctx {
+ struct crypto_aes_ctx key;
+ u8 __aligned(8) consts[];
+};
+
+struct mac_desc_ctx {
+ unsigned int len;
+ u8 dg[AES_BLOCK_SIZE];
+};
+
static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
@@ -215,14 +235,15 @@ static int ctr_encrypt(struct skcipher_request *req)
u8 *tsrc = walk.src.virt.addr;
/*
- * Minimum alignment is 8 bytes, so if nbytes is <= 8, we need
- * to tell aes_ctr_encrypt() to only read half a block.
+ * Tell aes_ctr_encrypt() to process a tail block.
*/
- blocks = (nbytes <= 8) ? -1 : 1;
+ blocks = -1;
- aes_ctr_encrypt(tail, tsrc, (u8 *)ctx->key_enc, rounds,
+ aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
blocks, walk.iv, first);
- memcpy(tdst, tail, nbytes);
+ if (tdst != tsrc)
+ memcpy(tdst, tsrc, nbytes);
+ crypto_xor(tdst, tail, nbytes);
err = skcipher_walk_done(&walk, 0);
}
kernel_neon_end();
@@ -282,7 +303,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@@ -298,7 +318,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@@ -315,7 +334,22 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
- .cra_alignmask = 7,
+ .cra_module = THIS_MODULE,
+ },
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
+ .setkey = skcipher_aes_setkey,
+ .encrypt = ctr_encrypt,
+ .decrypt = ctr_encrypt,
+}, {
+ .base = {
+ .cra_name = "ctr(aes)",
+ .cra_driver_name = "ctr-aes-" MODE,
+ .cra_priority = PRIO - 1,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct crypto_aes_ctx),
.cra_module = THIS_MODULE,
},
.min_keysize = AES_MIN_KEY_SIZE,
@@ -333,7 +367,6 @@ static struct skcipher_alg aes_algs[] = { {
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = AES_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct crypto_aes_xts_ctx),
- .cra_alignmask = 7,
.cra_module = THIS_MODULE,
},
.min_keysize = 2 * AES_MIN_KEY_SIZE,
@@ -344,15 +377,228 @@ static struct skcipher_alg aes_algs[] = { {
.decrypt = xts_decrypt,
} };
+static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+ int err;
+
+ err = aes_expandkey(&ctx->key, in_key, key_len);
+ if (err)
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+ return err;
+}
+
+static void cmac_gf128_mul_by_x(be128 *y, const be128 *x)
+{
+ u64 a = be64_to_cpu(x->a);
+ u64 b = be64_to_cpu(x->b);
+
+ y->a = cpu_to_be64((a << 1) | (b >> 63));
+ y->b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
+}
+
+static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+ be128 *consts = (be128 *)ctx->consts;
+ u8 *rk = (u8 *)ctx->key.key_enc;
+ int rounds = 6 + key_len / 4;
+ int err;
+
+ err = cbcmac_setkey(tfm, in_key, key_len);
+ if (err)
+ return err;
+
+ /* encrypt the zero vector */
+ kernel_neon_begin();
+ aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1, 1);
+ kernel_neon_end();
+
+ cmac_gf128_mul_by_x(consts, consts);
+ cmac_gf128_mul_by_x(consts + 1, consts);
+
+ return 0;
+}
+
+static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ static u8 const ks[3][AES_BLOCK_SIZE] = {
+ { [0 ... AES_BLOCK_SIZE - 1] = 0x1 },
+ { [0 ... AES_BLOCK_SIZE - 1] = 0x2 },
+ { [0 ... AES_BLOCK_SIZE - 1] = 0x3 },
+ };
+
+ struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+ u8 *rk = (u8 *)ctx->key.key_enc;
+ int rounds = 6 + key_len / 4;
+ u8 key[AES_BLOCK_SIZE];
+ int err;
+
+ err = cbcmac_setkey(tfm, in_key, key_len);
+ if (err)
+ return err;
+
+ kernel_neon_begin();
+ aes_ecb_encrypt(key, ks[0], rk, rounds, 1, 1);
+ aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2, 0);
+ kernel_neon_end();
+
+ return cbcmac_setkey(tfm, key, sizeof(key));
+}
+
+static int mac_init(struct shash_desc *desc)
+{
+ struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+ memset(ctx->dg, 0, AES_BLOCK_SIZE);
+ ctx->len = 0;
+
+ return 0;
+}
+
+static int mac_update(struct shash_desc *desc, const u8 *p, unsigned int len)
+{
+ struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
+ int rounds = 6 + tctx->key.key_length / 4;
+
+ while (len > 0) {
+ unsigned int l;
+
+ if ((ctx->len % AES_BLOCK_SIZE) == 0 &&
+ (ctx->len + len) > AES_BLOCK_SIZE) {
+
+ int blocks = len / AES_BLOCK_SIZE;
+
+ len %= AES_BLOCK_SIZE;
+
+ kernel_neon_begin();
+ aes_mac_update(p, tctx->key.key_enc, rounds, blocks,
+ ctx->dg, (ctx->len != 0), (len != 0));
+ kernel_neon_end();
+
+ p += blocks * AES_BLOCK_SIZE;
+
+ if (!len) {
+ ctx->len = AES_BLOCK_SIZE;
+ break;
+ }
+ ctx->len = 0;
+ }
+
+ l = min(len, AES_BLOCK_SIZE - ctx->len);
+
+ if (l <= AES_BLOCK_SIZE) {
+ crypto_xor(ctx->dg + ctx->len, p, l);
+ ctx->len += l;
+ len -= l;
+ p += l;
+ }
+ }
+
+ return 0;
+}
+
+static int cbcmac_final(struct shash_desc *desc, u8 *out)
+{
+ struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
+ int rounds = 6 + tctx->key.key_length / 4;
+
+ kernel_neon_begin();
+ aes_mac_update(NULL, tctx->key.key_enc, rounds, 0, ctx->dg, 1, 0);
+ kernel_neon_end();
+
+ memcpy(out, ctx->dg, AES_BLOCK_SIZE);
+
+ return 0;
+}
+
+static int cmac_final(struct shash_desc *desc, u8 *out)
+{
+ struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
+ int rounds = 6 + tctx->key.key_length / 4;
+ u8 *consts = tctx->consts;
+
+ if (ctx->len != AES_BLOCK_SIZE) {
+ ctx->dg[ctx->len] ^= 0x80;
+ consts += AES_BLOCK_SIZE;
+ }
+
+ kernel_neon_begin();
+ aes_mac_update(consts, tctx->key.key_enc, rounds, 1, ctx->dg, 0, 1);
+ kernel_neon_end();
+
+ memcpy(out, ctx->dg, AES_BLOCK_SIZE);
+
+ return 0;
+}
+
+static struct shash_alg mac_algs[] = { {
+ .base.cra_name = "cmac(aes)",
+ .base.cra_driver_name = "cmac-aes-" MODE,
+ .base.cra_priority = PRIO,
+ .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
+ 2 * AES_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = AES_BLOCK_SIZE,
+ .init = mac_init,
+ .update = mac_update,
+ .final = cmac_final,
+ .setkey = cmac_setkey,
+ .descsize = sizeof(struct mac_desc_ctx),
+}, {
+ .base.cra_name = "xcbc(aes)",
+ .base.cra_driver_name = "xcbc-aes-" MODE,
+ .base.cra_priority = PRIO,
+ .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
+ 2 * AES_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = AES_BLOCK_SIZE,
+ .init = mac_init,
+ .update = mac_update,
+ .final = cmac_final,
+ .setkey = xcbc_setkey,
+ .descsize = sizeof(struct mac_desc_ctx),
+}, {
+ .base.cra_name = "cbcmac(aes)",
+ .base.cra_driver_name = "cbcmac-aes-" MODE,
+ .base.cra_priority = PRIO,
+ .base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct mac_tfm_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = AES_BLOCK_SIZE,
+ .init = mac_init,
+ .update = mac_update,
+ .final = cbcmac_final,
+ .setkey = cbcmac_setkey,
+ .descsize = sizeof(struct mac_desc_ctx),
+} };
+
static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
static void aes_exit(void)
{
int i;
- for (i = 0; i < ARRAY_SIZE(aes_simd_algs) && aes_simd_algs[i]; i++)
- simd_skcipher_free(aes_simd_algs[i]);
+ for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++)
+ if (aes_simd_algs[i])
+ simd_skcipher_free(aes_simd_algs[i]);
+ crypto_unregister_shashes(mac_algs, ARRAY_SIZE(mac_algs));
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
}
@@ -369,7 +615,14 @@ static int __init aes_init(void)
if (err)
return err;
+ err = crypto_register_shashes(mac_algs, ARRAY_SIZE(mac_algs));
+ if (err)
+ goto unregister_ciphers;
+
for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+ if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
+ continue;
+
algname = aes_algs[i].base.cra_name + 2;
drvname = aes_algs[i].base.cra_driver_name + 2;
basename = aes_algs[i].base.cra_driver_name;
@@ -385,6 +638,8 @@ static int __init aes_init(void)
unregister_simds:
aes_exit();
+unregister_ciphers:
+ crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
return err;
}
@@ -392,5 +647,7 @@ unregister_simds:
module_cpu_feature_match(AES, aes_init);
#else
module_init(aes_init);
+EXPORT_SYMBOL(neon_aes_ecb_encrypt);
+EXPORT_SYMBOL(neon_aes_cbc_encrypt);
#endif
module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 838dad5c209f..2674d43d1384 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -1,7 +1,7 @@
/*
* linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
*
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -337,7 +337,7 @@ AES_ENTRY(aes_ctr_encrypt)
.Lctrcarrydone:
subs w4, w4, #1
- bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
+ bmi .Lctrtailblock /* blocks <0 means tail block */
ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16
@@ -348,10 +348,8 @@ AES_ENTRY(aes_ctr_encrypt)
FRAME_POP
ret
-.Lctrhalfblock:
- ld1 {v3.8b}, [x1]
- eor v3.8b, v0.8b, v3.8b
- st1 {v3.8b}, [x0]
+.Lctrtailblock:
+ st1 {v0.16b}, [x0]
FRAME_POP
ret
@@ -527,3 +525,30 @@ AES_ENTRY(aes_xts_decrypt)
FRAME_POP
ret
AES_ENDPROC(aes_xts_decrypt)
+
+ /*
+ * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
+ * int blocks, u8 dg[], int enc_before, int enc_after)
+ */
+AES_ENTRY(aes_mac_update)
+ ld1 {v0.16b}, [x4] /* get dg */
+ enc_prepare w2, x1, x7
+ cbnz w5, .Lmacenc
+
+.Lmacloop:
+ cbz w3, .Lmacout
+ ld1 {v1.16b}, [x0], #16 /* get next pt block */
+ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
+
+ subs w3, w3, #1
+ csinv x5, x6, xzr, eq
+ cbz w5, .Lmacout
+
+.Lmacenc:
+ encrypt_block v0, w2, x1, x7, w8
+ b .Lmacloop
+
+.Lmacout:
+ st1 {v0.16b}, [x4] /* return dg */
+ ret
+AES_ENDPROC(aes_mac_update)
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index 85f07ead7c5c..f1e3aa2732f9 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -1,7 +1,7 @@
/*
* linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
*
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -17,17 +17,25 @@
/* multiply by polynomial 'x' in GF(2^8) */
.macro mul_by_x, out, in, temp, const
sshr \temp, \in, #7
- add \out, \in, \in
+ shl \out, \in, #1
and \temp, \temp, \const
eor \out, \out, \temp
.endm
+ /* multiply by polynomial 'x^2' in GF(2^8) */
+ .macro mul_by_x2, out, in, temp, const
+ ushr \temp, \in, #6
+ shl \out, \in, #2
+ pmul \temp, \temp, \const
+ eor \out, \out, \temp
+ .endm
+
/* preload the entire Sbox */
.macro prepare, sbox, shiftrows, temp
adr \temp, \sbox
- movi v12.16b, #0x40
+ movi v12.16b, #0x1b
ldr q13, \shiftrows
- movi v14.16b, #0x1b
+ ldr q14, .Lror32by8
ld1 {v16.16b-v19.16b}, [\temp], #64
ld1 {v20.16b-v23.16b}, [\temp], #64
ld1 {v24.16b-v27.16b}, [\temp], #64
@@ -50,37 +58,31 @@
/* apply SubBytes transformation using the the preloaded Sbox */
.macro sub_bytes, in
- sub v9.16b, \in\().16b, v12.16b
+ sub v9.16b, \in\().16b, v15.16b
tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
- sub v10.16b, v9.16b, v12.16b
+ sub v10.16b, v9.16b, v15.16b
tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
- sub v11.16b, v10.16b, v12.16b
+ sub v11.16b, v10.16b, v15.16b
tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
.endm
/* apply MixColumns transformation */
- .macro mix_columns, in
- mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
- rev32 v8.8h, \in\().8h
- eor \in\().16b, v10.16b, \in\().16b
- shl v9.4s, v8.4s, #24
- shl v11.4s, \in\().4s, #24
- sri v9.4s, v8.4s, #8
- sri v11.4s, \in\().4s, #8
- eor v9.16b, v9.16b, v8.16b
- eor v10.16b, v10.16b, v9.16b
- eor \in\().16b, v10.16b, v11.16b
- .endm
-
+ .macro mix_columns, in, enc
+ .if \enc == 0
/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
- .macro inv_mix_columns, in
- mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
- mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
- eor \in\().16b, \in\().16b, v11.16b
- rev32 v11.8h, v11.8h
- eor \in\().16b, \in\().16b, v11.16b
- mix_columns \in
+ mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
+ eor \in\().16b, \in\().16b, v8.16b
+ rev32 v8.8h, v8.8h
+ eor \in\().16b, \in\().16b, v8.16b
+ .endif
+
+ mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
+ rev32 v8.8h, \in\().8h
+ eor v8.16b, v8.16b, v9.16b
+ eor \in\().16b, \in\().16b, v8.16b
+ tbl \in\().16b, {\in\().16b}, v14.16b
+ eor \in\().16b, \in\().16b, v8.16b
.endm
.macro do_block, enc, in, rounds, rk, rkp, i
@@ -88,16 +90,13 @@
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
+ movi v15.16b, #0x40
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
sub_bytes \in
- ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
+ ld1 {v15.4s}, [\rkp], #16
beq 2222f
- .if \enc == 1
- mix_columns \in
- .else
- inv_mix_columns \in
- .endif
+ mix_columns \in, \enc
b 1111b
2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
.endm
@@ -116,139 +115,114 @@
*/
.macro sub_bytes_2x, in0, in1
- sub v8.16b, \in0\().16b, v12.16b
- sub v9.16b, \in1\().16b, v12.16b
+ sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
+ sub v9.16b, \in1\().16b, v15.16b
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
- sub v10.16b, v8.16b, v12.16b
- sub v11.16b, v9.16b, v12.16b
+ sub v10.16b, v8.16b, v15.16b
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
+ sub v11.16b, v9.16b, v15.16b
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
- sub v8.16b, v10.16b, v12.16b
- sub v9.16b, v11.16b, v12.16b
+ sub v8.16b, v10.16b, v15.16b
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
+ sub v9.16b, v11.16b, v15.16b
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
.endm
.macro sub_bytes_4x, in0, in1, in2, in3
- sub v8.16b, \in0\().16b, v12.16b
+ sub v8.16b, \in0\().16b, v15.16b
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
- sub v9.16b, \in1\().16b, v12.16b
+ sub v9.16b, \in1\().16b, v15.16b
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
- sub v10.16b, \in2\().16b, v12.16b
+ sub v10.16b, \in2\().16b, v15.16b
tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
- sub v11.16b, \in3\().16b, v12.16b
+ sub v11.16b, \in3\().16b, v15.16b
tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
- sub v8.16b, v8.16b, v12.16b
+ sub v8.16b, v8.16b, v15.16b
tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
- sub v9.16b, v9.16b, v12.16b
+ sub v9.16b, v9.16b, v15.16b
tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
- sub v10.16b, v10.16b, v12.16b
+ sub v10.16b, v10.16b, v15.16b
tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
- sub v11.16b, v11.16b, v12.16b
+ sub v11.16b, v11.16b, v15.16b
tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
- sub v8.16b, v8.16b, v12.16b
+ sub v8.16b, v8.16b, v15.16b
tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
- sub v9.16b, v9.16b, v12.16b
+ sub v9.16b, v9.16b, v15.16b
tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
- sub v10.16b, v10.16b, v12.16b
+ sub v10.16b, v10.16b, v15.16b
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
- sub v11.16b, v11.16b, v12.16b
+ sub v11.16b, v11.16b, v15.16b
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
.endm
.macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
- sshr \tmp0\().16b, \in0\().16b, #7
- add \out0\().16b, \in0\().16b, \in0\().16b
- sshr \tmp1\().16b, \in1\().16b, #7
+ sshr \tmp0\().16b, \in0\().16b, #7
+ shl \out0\().16b, \in0\().16b, #1
+ sshr \tmp1\().16b, \in1\().16b, #7
and \tmp0\().16b, \tmp0\().16b, \const\().16b
- add \out1\().16b, \in1\().16b, \in1\().16b
+ shl \out1\().16b, \in1\().16b, #1
and \tmp1\().16b, \tmp1\().16b, \const\().16b
eor \out0\().16b, \out0\().16b, \tmp0\().16b
eor \out1\().16b, \out1\().16b, \tmp1\().16b
.endm
- .macro mix_columns_2x, in0, in1
- mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
- rev32 v10.8h, \in0\().8h
- rev32 v11.8h, \in1\().8h
- eor \in0\().16b, v8.16b, \in0\().16b
- eor \in1\().16b, v9.16b, \in1\().16b
- shl v12.4s, v10.4s, #24
- shl v13.4s, v11.4s, #24
- eor v8.16b, v8.16b, v1