summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 10:40:14 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-08-03 10:40:14 -0700
commitab5c60b79ab6cc50b39bbb21b2f9fb55af900b84 (patch)
tree71fa895fbf01e3b88f26cf257d9105f9d286b631 /arch
parent5577416c39652d395a6045677f4f598564aba1cf (diff)
parent3cbfe80737c18ac6e635421ab676716a393d3074 (diff)
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Add support for allocating transforms on a specific NUMA Node - Introduce the flag CRYPTO_ALG_ALLOCATES_MEMORY for storage users Algorithms: - Drop PMULL based ghash on arm64 - Fixes for building with clang on x86 - Add sha256 helper that does the digest in one go - Add SP800-56A rev 3 validation checks to dh Drivers: - Permit users to specify NUMA node in hisilicon/zip - Add support for i.MX6 in imx-rngc - Add sa2ul crypto driver - Add BA431 hwrng driver - Add Ingenic JZ4780 and X1000 hwrng driver - Spread IRQ affinity in inside-secure and marvell/cesa" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (157 commits) crypto: sa2ul - Fix inconsistent IS_ERR and PTR_ERR hwrng: core - remove redundant initialization of variable ret crypto: x86/curve25519 - Remove unused carry variables crypto: ingenic - Add hardware RNG for Ingenic JZ4780 and X1000 dt-bindings: RNG: Add Ingenic RNG bindings. crypto: caam/qi2 - add module alias crypto: caam - add more RNG hw error codes crypto: caam/jr - remove incorrect reference to caam_jr_register() crypto: caam - silence .setkey in case of bad key length crypto: caam/qi2 - create ahash shared descriptors only once crypto: caam/qi2 - fix error reporting for caam_hash_alloc crypto: caam - remove deadcode on 32-bit platforms crypto: ccp - use generic power management crypto: xts - Replace memcpy() invocation with simple assignment crypto: marvell/cesa - irq balance crypto: inside-secure - irq balance crypto: ecc - SP800-56A rev 3 local public key validation crypto: dh - SP800-56A rev 3 local public key validation crypto: dh - check validity of Z before export lib/mpi: Add mpi_sub_ui() ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/crypto/crc32-ce-core.S2
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c51
-rw-r--r--arch/arm/crypto/sha1-armv4-large.S2
-rw-r--r--arch/arm/crypto/sha256-armv4.pl2
-rw-r--r--arch/arm/crypto/sha256-core.S_shipped2
-rw-r--r--arch/arm/crypto/sha512-armv4.pl4
-rw-r--r--arch/arm/crypto/sha512-core.S_shipped4
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c257
-rw-r--r--arch/sparc/crypto/sha256_glue.c14
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S15
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S739
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S1
-rw-r--r--arch/x86/crypto/chacha-ssse3-x86_64.S16
-rw-r--r--arch/x86/crypto/chacha_glue.c17
-rw-r--r--arch/x86/crypto/crc32-pclmul_asm.S47
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S7
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c6
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S17
-rw-r--r--arch/x86/include/asm/inst.h163
19 files changed, 551 insertions, 815 deletions
diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/crypto/crc32-ce-core.S
index 5cbd4a6fedad..3f13a76b9066 100644
--- a/arch/arm/crypto/crc32-ce-core.S
+++ b/arch/arm/crypto/crc32-ce-core.S
@@ -39,7 +39,7 @@
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
* at:
- * http://www.intel.com/products/processor/manuals/
+ * https://www.intel.com/products/processor/manuals/
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
* Volume 2B: Instruction Set Reference, N-Z
*
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index a00fd329255f..f13401f3e669 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -16,6 +16,7 @@
#include <crypto/gf128mul.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
+#include <linux/jump_label.h>
#include <linux/module.h>
MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
@@ -27,12 +28,8 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GHASH_DIGEST_SIZE 16
struct ghash_key {
- u64 h[2];
- u64 h2[2];
- u64 h3[2];
- u64 h4[2];
-
be128 k;
+ u64 h[][2];
};
struct ghash_desc_ctx {
@@ -46,16 +43,12 @@ struct ghash_async_ctx {
};
asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
-static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_p64);
static int ghash_init(struct shash_desc *desc)
{
@@ -70,7 +63,10 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
{
if (likely(crypto_simd_usable())) {
kernel_neon_begin();
- pmull_ghash_update(blocks, dg, src, key, head);
+ if (static_branch_likely(&use_p64))
+ pmull_ghash_update_p64(blocks, dg, src, key->h, head);
+ else
+ pmull_ghash_update_p8(blocks, dg, src, key->h, head);
kernel_neon_end();
} else {
be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
@@ -161,25 +157,26 @@ static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
struct ghash_key *key = crypto_shash_ctx(tfm);
- be128 h;
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
/* needed for the fallback */
memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
- ghash_reflect(key->h, &key->k);
+ ghash_reflect(key->h[0], &key->k);
- h = key->k;
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h2, &h);
+ if (static_branch_likely(&use_p64)) {
+ be128 h = key->k;
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h3, &h);
+ gf128mul_lle(&h, &key->k);
+ ghash_reflect(key->h[1], &h);
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h4, &h);
+ gf128mul_lle(&h, &key->k);
+ ghash_reflect(key->h[2], &h);
+ gf128mul_lle(&h, &key->k);
+ ghash_reflect(key->h[3], &h);
+ }
return 0;
}
@@ -195,7 +192,7 @@ static struct shash_alg ghash_alg = {
.base.cra_driver_name = "ghash-ce-sync",
.base.cra_priority = 300 - 1,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct ghash_key),
+ .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
};
@@ -354,10 +351,10 @@ static int __init ghash_ce_mod_init(void)
if (!(elf_hwcap & HWCAP_NEON))
return -ENODEV;
- if (elf_hwcap2 & HWCAP2_PMULL)
- pmull_ghash_update = pmull_ghash_update_p64;
- else
- pmull_ghash_update = pmull_ghash_update_p8;
+ if (elf_hwcap2 & HWCAP2_PMULL) {
+ ghash_alg.base.cra_ctxsize += 3 * sizeof(u64[2]);
+ static_branch_enable(&use_p64);
+ }
err = crypto_register_shash(&ghash_alg);
if (err)
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S
index f82cd8cf5a09..1c8b685149f2 100644
--- a/arch/arm/crypto/sha1-armv4-large.S
+++ b/arch/arm/crypto/sha1-armv4-large.S
@@ -13,7 +13,7 @@
@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see http://www.openssl.org/~appro/cryptogams/.
+@ details see https://www.openssl.org/~appro/cryptogams/.
@ ====================================================================
@ sha1_block procedure for ARMv4.
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl
index a03cf4dfb781..9f96ff48e4a8 100644
--- a/arch/arm/crypto/sha256-armv4.pl
+++ b/arch/arm/crypto/sha256-armv4.pl
@@ -13,7 +13,7 @@
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
+# details see https://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# SHA256 block procedure for ARMv4. May 2007.
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
index 054aae0edfce..ea04b2ab0c33 100644
--- a/arch/arm/crypto/sha256-core.S_shipped
+++ b/arch/arm/crypto/sha256-core.S_shipped
@@ -12,7 +12,7 @@
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see http://www.openssl.org/~appro/cryptogams/.
+@ details see https://www.openssl.org/~appro/cryptogams/.
@ ====================================================================
@ SHA256 block procedure for ARMv4. May 2007.
diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl
index 788c17b56ecc..69df68981acd 100644
--- a/arch/arm/crypto/sha512-armv4.pl
+++ b/arch/arm/crypto/sha512-armv4.pl
@@ -13,7 +13,7 @@
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
+# details see https://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# SHA512 block procedure for ARMv4. September 2007.
@@ -43,7 +43,7 @@
# terms it's 22.6 cycles per byte, which is disappointing result.
# Technical writers asserted that 3-way S4 pipeline can sustain
# multiple NEON instructions per cycle, but dual NEON issue could
-# not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+# not be observed, see https://www.openssl.org/~appro/Snapdragon-S4.html
# for further details. On side note Cortex-A15 processes one byte in
# 16 cycles.
diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped
index 710ea309769e..cb147db5cbfe 100644
--- a/arch/arm/crypto/sha512-core.S_shipped
+++ b/arch/arm/crypto/sha512-core.S_shipped
@@ -12,7 +12,7 @@
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see http://www.openssl.org/~appro/cryptogams/.
+@ details see https://www.openssl.org/~appro/cryptogams/.
@ ====================================================================
@ SHA512 block procedure for ARMv4. September 2007.
@@ -42,7 +42,7 @@
@ terms it's 22.6 cycles per byte, which is disappointing result.
@ Technical writers asserted that 3-way S4 pipeline can sustain
@ multiple NEON instructions per cycle, but dual NEON issue could
-@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
+@ not be observed, see https://www.openssl.org/~appro/Snapdragon-S4.html
@ for further details. On side note Cortex-A15 processes one byte in
@ 16 cycles.
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 22831d3b7f62..da1034867aaa 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -31,12 +31,8 @@ MODULE_ALIAS_CRYPTO("ghash");
#define GCM_IV_SIZE 12
struct ghash_key {
- u64 h[2];
- u64 h2[2];
- u64 h3[2];
- u64 h4[2];
-
be128 k;
+ u64 h[][2];
};
struct ghash_desc_ctx {
@@ -51,22 +47,18 @@ struct gcm_aes_ctx {
};
asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
- struct ghash_key const *k,
- const char *head);
+ u64 const h[][2], const char *head);
asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
- struct ghash_key const *k, u64 dg[],
- u8 ctr[], u32 const rk[], int rounds,
- u8 tag[]);
+ u64 const h[][2], u64 dg[], u8 ctr[],
+ u32 const rk[], int rounds, u8 tag[]);
asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
- struct ghash_key const *k, u64 dg[],
- u8 ctr[], u32 const rk[], int rounds,
- u8 tag[]);
+ u64 const h[][2], u64 dg[], u8 ctr[],
+ u32 const rk[], int rounds, u8 tag[]);
static int ghash_init(struct shash_desc *desc)
{
@@ -77,48 +69,51 @@ static int ghash_init(struct shash_desc *desc)
}
static void ghash_do_update(int blocks, u64 dg[], const char *src,
- struct ghash_key *key, const char *head,
- void (*simd_update)(int blocks, u64 dg[],
- const char *src,
- struct ghash_key const *k,
- const char *head))
+ struct ghash_key *key, const char *head)
{
- if (likely(crypto_simd_usable() && simd_update)) {
- kernel_neon_begin();
- simd_update(blocks, dg, src, key, head);
- kernel_neon_end();
- } else {
- be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+ be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
- do {
- const u8 *in = src;
-
- if (head) {
- in = head;
- blocks++;
- head = NULL;
- } else {
- src += GHASH_BLOCK_SIZE;
- }
+ do {
+ const u8 *in = src;
+
+ if (head) {
+ in = head;
+ blocks++;
+ head = NULL;
+ } else {
+ src += GHASH_BLOCK_SIZE;
+ }
- crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
- gf128mul_lle(&dst, &key->k);
- } while (--blocks);
+ crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+ gf128mul_lle(&dst, &key->k);
+ } while (--blocks);
- dg[0] = be64_to_cpu(dst.b);
- dg[1] = be64_to_cpu(dst.a);
+ dg[0] = be64_to_cpu(dst.b);
+ dg[1] = be64_to_cpu(dst.a);
+}
+
+static __always_inline
+void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
+ struct ghash_key *key, const char *head,
+ void (*simd_update)(int blocks, u64 dg[],
+ const char *src,
+ u64 const h[][2],
+ const char *head))
+{
+ if (likely(crypto_simd_usable())) {
+ kernel_neon_begin();
+ simd_update(blocks, dg, src, key->h, head);
+ kernel_neon_end();
+ } else {
+ ghash_do_update(blocks, dg, src, key, head);
}
}
/* avoid hogging the CPU for too long */
#define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)
-static int __ghash_update(struct shash_desc *desc, const u8 *src,
- unsigned int len,
- void (*simd_update)(int blocks, u64 dg[],
- const char *src,
- struct ghash_key const *k,
- const char *head))
+static int ghash_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -143,9 +138,9 @@ static int __ghash_update(struct shash_desc *desc, const u8 *src,
do {
int chunk = min(blocks, MAX_BLOCKS);
- ghash_do_update(chunk, ctx->digest, src, key,
- partial ? ctx->buf : NULL,
- simd_update);
+ ghash_do_simd_update(chunk, ctx->digest, src, key,
+ partial ? ctx->buf : NULL,
+ pmull_ghash_update_p8);
blocks -= chunk;
src += chunk * GHASH_BLOCK_SIZE;
@@ -157,39 +152,7 @@ static int __ghash_update(struct shash_desc *desc, const u8 *src,
return 0;
}
-static int ghash_update_p8(struct shash_desc *desc, const u8 *src,
- unsigned int len)
-{
- return __ghash_update(desc, src, len, pmull_ghash_update_p8);
-}
-
-static int ghash_update_p64(struct shash_desc *desc, const u8 *src,
- unsigned int len)
-{
- return __ghash_update(desc, src, len, pmull_ghash_update_p64);
-}
-
-static int ghash_final_p8(struct shash_desc *desc, u8 *dst)
-{
- struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
- unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
-
- if (partial) {
- struct ghash_key *key = crypto_shash_ctx(desc->tfm);
-
- memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
-
- ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
- pmull_ghash_update_p8);
- }
- put_unaligned_be64(ctx->digest[1], dst);
- put_unaligned_be64(ctx->digest[0], dst + 8);
-
- *ctx = (struct ghash_desc_ctx){};
- return 0;
-}
-
-static int ghash_final_p64(struct shash_desc *desc, u8 *dst)
+static int ghash_final(struct shash_desc *desc, u8 *dst)
{
struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
unsigned int partial = ctx->count % GHASH_BLOCK_SIZE;
@@ -199,8 +162,8 @@ static int ghash_final_p64(struct shash_desc *desc, u8 *dst)
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
- ghash_do_update(1, ctx->digest, ctx->buf, key, NULL,
- pmull_ghash_update_p64);
+ ghash_do_simd_update(1, ctx->digest, ctx->buf, key, NULL,
+ pmull_ghash_update_p8);
}
put_unaligned_be64(ctx->digest[1], dst);
put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -220,29 +183,6 @@ static void ghash_reflect(u64 h[], const be128 *k)
h[1] ^= 0xc200000000000000UL;
}
-static int __ghash_setkey(struct ghash_key *key,
- const u8 *inkey, unsigned int keylen)
-{
- be128 h;
-
- /* needed for the fallback */
- memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
-
- ghash_reflect(key->h, &key->k);
-
- h = key->k;
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h2, &h);
-
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h3, &h);
-
- gf128mul_lle(&h, &key->k);
- ghash_reflect(key->h4, &h);
-
- return 0;
-}
-
static int ghash_setkey(struct crypto_shash *tfm,
const u8 *inkey, unsigned int keylen)
{
@@ -251,38 +191,28 @@ static int ghash_setkey(struct crypto_shash *tfm,
if (keylen != GHASH_BLOCK_SIZE)
return -EINVAL;
- return __ghash_setkey(key, inkey, keylen);
+ /* needed for the fallback */
+ memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
+
+ ghash_reflect(key->h[0], &key->k);
+ return 0;
}
-static struct shash_alg ghash_alg[] = {{
+static struct shash_alg ghash_alg = {
.base.cra_name = "ghash",
.base.cra_driver_name = "ghash-neon",
.base.cra_priority = 150,
.base.cra_blocksize = GHASH_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct ghash_key),
- .base.cra_module = THIS_MODULE,
-
- .digestsize = GHASH_DIGEST_SIZE,
- .init = ghash_init,
- .update = ghash_update_p8,
- .final = ghash_final_p8,
- .setkey = ghash_setkey,
- .descsize = sizeof(struct ghash_desc_ctx),
-}, {
- .base.cra_name = "ghash",
- .base.cra_driver_name = "ghash-ce",
- .base.cra_priority = 200,
- .base.cra_blocksize = GHASH_BLOCK_SIZE,
- .base.cra_ctxsize = sizeof(struct ghash_key),
+ .base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
.digestsize = GHASH_DIGEST_SIZE,
.init = ghash_init,
- .update = ghash_update_p64,
- .final = ghash_final_p64,
+ .update = ghash_update,
+ .final = ghash_final,
.setkey = ghash_setkey,
.descsize = sizeof(struct ghash_desc_ctx),
-}};
+};
static int num_rounds(struct crypto_aes_ctx *ctx)
{
@@ -301,6 +231,7 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
{
struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
u8 key[GHASH_BLOCK_SIZE];
+ be128 h;
int ret;
ret = aes_expandkey(&ctx->aes_key, inkey, keylen);
@@ -309,7 +240,22 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
- return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128));
+ /* needed for the fallback */
+ memcpy(&ctx->ghash_key.k, key, GHASH_BLOCK_SIZE);
+
+ ghash_reflect(ctx->ghash_key.h[0], &ctx->ghash_key.k);
+
+ h = ctx->ghash_key.k;
+ gf128mul_lle(&h, &ctx->ghash_key.k);
+ ghash_reflect(ctx->ghash_key.h[1], &h);
+
+ gf128mul_lle(&h, &ctx->ghash_key.k);
+ ghash_reflect(ctx->ghash_key.h[2], &h);
+
+ gf128mul_lle(&h, &ctx->ghash_key.k);
+ ghash_reflect(ctx->ghash_key.h[3], &h);
+
+ return 0;
}
static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
@@ -341,9 +287,9 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
int blocks = count / GHASH_BLOCK_SIZE;
- ghash_do_update(blocks, dg, src, &ctx->ghash_key,
- *buf_count ? buf : NULL,
- pmull_ghash_update_p64);
+ ghash_do_simd_update(blocks, dg, src, &ctx->ghash_key,
+ *buf_count ? buf : NULL,
+ pmull_ghash_update_p64);
src += blocks * GHASH_BLOCK_SIZE;
count %= GHASH_BLOCK_SIZE;
@@ -387,8 +333,8 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
if (buf_count) {
memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
- ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL,
- pmull_ghash_update_p64);
+ ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL,
+ pmull_ghash_update_p64);
}
}
@@ -433,8 +379,8 @@ static int gcm_encrypt(struct aead_request *req)
}
kernel_neon_begin();
- pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg,
- iv, ctx->aes_key.key_enc, nrounds,
+ pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc, nrounds,
tag);
kernel_neon_end();
@@ -464,7 +410,7 @@ static int gcm_encrypt(struct aead_request *req)
} while (--remaining > 0);
ghash_do_update(blocks, dg, walk.dst.virt.addr,
- &ctx->ghash_key, NULL, NULL);
+ &ctx->ghash_key, NULL);
err = skcipher_walk_done(&walk,
walk.nbytes % AES_BLOCK_SIZE);
@@ -483,7 +429,7 @@ static int gcm_encrypt(struct aead_request *req)
tag = (u8 *)&lengths;
ghash_do_update(1, dg, tag, &ctx->ghash_key,
- walk.nbytes ? buf : NULL, NULL);
+ walk.nbytes ? buf : NULL);
if (walk.nbytes)
err = skcipher_walk_done(&walk, 0);
@@ -547,8 +493,8 @@ static int gcm_decrypt(struct aead_request *req)
}
kernel_neon_begin();
- pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg,
- iv, ctx->aes_key.key_enc, nrounds,
+ pmull_gcm_decrypt(nbytes, dst, src, ctx->ghash_key.h,
+ dg, iv, ctx->aes_key.key_enc, nrounds,
tag);
kernel_neon_end();
@@ -568,7 +514,7 @@ static int gcm_decrypt(struct aead_request *req)
u8 *dst = walk.dst.virt.addr;
ghash_do_update(blocks, dg, walk.src.virt.addr,
- &ctx->ghash_key, NULL, NULL);
+ &ctx->ghash_key, NULL);
do {
aes_encrypt(&ctx->aes_key, buf, iv);
@@ -591,7 +537,7 @@ static int gcm_decrypt(struct aead_request *req)
tag = (u8 *)&lengths;
ghash_do_update(1, dg, tag, &ctx->ghash_key,
- walk.nbytes ? buf : NULL, NULL);
+ walk.nbytes ? buf : NULL);
if (walk.nbytes) {
aes_encrypt(&ctx->aes_key, buf, iv);
@@ -635,43 +581,28 @@ static struct aead_alg gcm_aes_alg = {
.base.cra_driver_name = "gcm-aes-ce",
.base.cra_priority = 300,
.base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct gcm_aes_ctx),
+ .base.cra_ctxsize = sizeof(struct gcm_aes_ctx) +
+ 4 * sizeof(u64[2]),
.base.cra_module = THIS_MODULE,
};
static int __init ghash_ce_mod_init(void)
{
- int ret;
-
if (!cpu_have_named_feature(ASIMD))
return -ENODEV;
if (cpu_have_named_feature(PMULL))
- ret = crypto_register_shashes(ghash_alg,
- ARRAY_SIZE(ghash_alg));
- else
- /* only register the first array element */
- ret = crypto_register_shash(ghash_alg);
+ return crypto_register_aead(&gcm_aes_alg);
- if (ret)
- return ret;
-
- if (cpu_have_named_feature(PMULL)) {
- ret = crypto_register_aead(&gcm_aes_alg);
- if (ret)
- crypto_unregister_shashes(ghash_alg,
- ARRAY_SIZE(ghash_alg));
- }
- return ret;
+ return crypto_register_shash(&ghash_alg);
}
static void __exit ghash_ce_mod_exit(void)
{
if (cpu_have_named_feature(PMULL))
- crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg));
+ crypto_unregister_aead(&gcm_aes_alg);
else
- crypto_unregister_shash(ghash_alg);
- crypto_unregister_aead(&gcm_aes_alg);
+ crypto_unregister_shash(&ghash_alg);
}
static const struct cpu_feature ghash_cpu_feature[] = {
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c
index 286bc8ecf15b..ca2547df9652 100644
--- a/arch/sparc/crypto/sha256_glue.c
+++ b/arch/sparc/crypto/sha256_glue.c
@@ -156,7 +156,7 @@ static int sha256_sparc64_import(struct shash_desc *desc, const void *in)
return 0;
}
-static struct shash_alg sha256 = {
+static struct shash_alg sha256_alg = {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_sparc64_init,
.update = sha256_sparc64_update,
@@ -174,7 +174,7 @@ static struct shash_alg sha256 = {
}
};
-static struct shash_alg sha224 = {
+static struct shash_alg sha224_alg = {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_sparc64_init,
.update = sha256_sparc64_update,
@@ -206,13 +206,13 @@ static bool __init sparc64_has_sha256_opcode(void)
static int __init sha256_sparc64_mod_init(void)
{
if (sparc64_has_sha256_opcode()) {
- int ret = crypto_register_shash(&sha224);
+ int ret = crypto_register_shash(&sha224_alg);
if (ret < 0)
return ret;
- ret = crypto_register_shash(&sha256);
+ ret = crypto_register_shash(&sha256_alg);
if (ret < 0) {
- crypto_unregister_shash(&sha224);
+ crypto_unregister_shash(&sha224_alg);
return ret;
}
@@ -225,8 +225,8 @@ static int __init sha256_sparc64_mod_init(void)
static void __exit sha256_sparc64_mod_fini(void)
{
- crypto_unregister_shash(&sha224);
- crypto_unregister_shash(&sha256);
+ crypto_unregister_shash(&sha224_alg);
+ crypto_unregister_shash(&sha256_alg);
}
module_init(sha256_sparc64_mod_init);
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index ec437db1fa54..3f0fc7dd87d7 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -63,7 +63,6 @@
*/
#include <linux/linkage.h>
-#include <asm/inst.h>
#define VMOVDQ vmovdqu
@@ -127,10 +126,6 @@ ddq_add_8:
/* generate a unique variable for ddq_add_x */
-.macro setddq n
- var_ddq_add = ddq_add_\n
-.endm
-
/* generate a unique variable for xmm register */
.macro setxdata n
var_xdata = %xmm\n
@@ -140,9 +135,7 @@ ddq_add_8:
.macro club name, id
.altmacro
- .if \name == DDQ_DATA
- setddq %\id
- .elseif \name == XDATA
+ .if \name == XDATA
setxdata %\id
.endif
.noaltmacro
@@ -165,9 +158,8 @@ ddq_add_8:
.set i, 1
.rept (by - 1)
- club DDQ_DATA, i
club XDATA, i
- vpaddq var_ddq_add(%rip), xcounter, var_xdata
+ vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata
vptest ddq_low_msk(%rip), var_xdata
jnz 1f
vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata
@@ -180,8 +172,7 @@ ddq_add_8:
vmovdqa 1*16(p_keys), xkeyA
vpxor xkey0, xdata0, xdata0
- club DDQ_DATA, by
- vpaddq var_ddq_add(%rip), xcounter, xcounter
+ vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter
vptest ddq_low_msk(%rip), xcounter
jnz 1f
vpaddq ddq_high_add_1(%rip), xcounter, xcounter
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 54e7d15dbd0d..1852b19a73a0 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -26,7 +26,6 @@
*/
#include <linux/linkage.h>
-#include <asm/inst.h>
#include <asm/frame.h>
#include <asm/nospec-branch.h>
@@ -201,7 +200,7 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
mov \SUBKEY, %r12
movdqu (%r12), \TMP3
movdqa SHUF_MASK(%rip), \TMP2
- PSHUFB_XMM \TMP2, \TMP3
+ pshufb \TMP2, \TMP3
# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
@@ -263,10 +262,10 @@ ALL_F: .octa 0xffffffffffffffffffffffffffffffff
movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
movdqa SHUF_MASK(%rip), %xmm2
- PSHUFB_XMM %xmm2, %xmm0
+ pshufb %xmm2, %xmm0
movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
- PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
movdqu HashKey(%arg2), %xmm13
CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
@@ -347,7 +346,7 @@ _zero_cipher_left_\@:
paddd ONE(%rip), %xmm0 # INCR CNT to get Yn
movdqu %xmm0, CurCount(%arg2)
movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm0
+ pshufb %xmm10, %xmm0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
movdqu %xmm0, PBlockEncKey(%arg2)
@@ -377,7 +376,7 @@ _large_enough_update_\@:
# get the appropriate shuffle mask
movdqu (%r12), %xmm2
# shift right 16-r13 bytes
- PSHUFB_XMM %xmm2, %xmm1
+ pshufb %xmm2, %xmm1
_data_read_\@:
lea ALL_F+16(%rip), %r12
@@ -393,12 +392,12 @@ _data_read_\@:
.ifc \operation, dec
pand %xmm1, %xmm2
movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10 ,%xmm2
+ pshufb %xmm10 ,%xmm2
pxor %xmm2, %xmm8
.else
movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10,%xmm0
+ pshufb %xmm10,%xmm0
pxor %xmm0, %xmm8
.endif
@@ -408,17 +407,17 @@ _data_read_\@:
# GHASH computation for the last <16 byte block
movdqa SHUF_MASK(%rip), %xmm10
# shuffle xmm0 back to output as ciphertext
- PSHUFB_XMM %xmm10, %xmm0
+ pshufb %xmm10, %xmm0
.endif
# Output %r13 bytes
- MOVQ_R64_XMM %xmm0, %rax
+ movq %xmm0, %rax
cmp $8, %r13
jle _less_than_8_bytes_left_\@
mov %rax, (%arg3 , %r11, 1)
add $8, %r11
psrldq $8, %xmm0
- MOVQ_R64_XMM %xmm0, %rax
+ movq %xmm0, %rax
sub $8, %r13
_less_than_8_bytes_left_\@:
mov %al, (%arg3, %r11, 1)
@@ -449,7 +448,7 @@ _partial_done\@:
movd %r12d, %xmm15 # len(A) in %xmm15
mov InLen(%arg2), %r12
shl $3, %r12 # len(C) in bits (*128)
- MOVQ_R64_XMM %r12, %xmm1
+ movq %r12, %xmm1
pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
@@ -457,7 +456,7 @@ _partial_done\@:
GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
# final GHASH computation
movdqa SHUF_MASK(%rip), %xmm10
- PSHUFB_XMM %xmm10, %xmm8
+ pshufb %xmm10, %xmm8
movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0
ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
@@ -470,7 +469,7 @@ _return_T_\@:
cmp $8, %r11
jl _T_4_\@
_T_8_\@:
- MOVQ_R64_XMM %xmm0, %rax
+ movq %xmm0, %rax
mov %rax, (%r10)
add $8, %r10
sub $8, %r11
@@ -518,9 +517,9 @@ _return_T_done_\@:
pshufd $78, \HK, \TMP3
pxor \GH, \TMP2 # TMP2 = a1+a0
pxor \HK, \TMP3 # TMP3 = b1+b0
- PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1
- PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0
- PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0)
+ p