summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-05 15:51:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-05 15:51:21 -0700
commit3e1a29b3bf66c2850ea8eba78c59c234921c0b69 (patch)
tree641a5428e3a1ef205fafede3d6a03dae85d30e92 /arch
parentfd59ccc53062964007beda8787ffd9cd93968d63 (diff)
parentb268b3506d9910ca8238e92cb1dc51340574b2f2 (diff)
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6
Pull crypto updates from Herbert Xu: "API: - Decryption test vectors are now automatically generated from encryption test vectors. Algorithms: - Fix unaligned access issues in crc32/crc32c. - Add zstd compression algorithm. - Add AEGIS. - Add MORUS. Drivers: - Add accelerated AEGIS/MORUS on x86. - Add accelerated SM4 on arm64. - Removed x86 assembly salsa implementation as it is slower than C. - Add authenc(hmac(sha*), cbc(aes)) support in inside-secure. - Add ctr(aes) support in crypto4xx. - Add hardware key support in ccree. - Add support for new Centaur CPU in via-rng" * 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (112 commits) crypto: chtls - free beyond end rspq_skb_cache crypto: chtls - kbuild warnings crypto: chtls - dereference null variable crypto: chtls - wait for memory sendmsg, sendpage crypto: chtls - key len correction crypto: salsa20 - Revert "crypto: salsa20 - export generic helpers" crypto: x86/salsa20 - remove x86 salsa20 implementations crypto: ccp - Add GET_ID SEV command crypto: ccp - Add DOWNLOAD_FIRMWARE SEV command crypto: qat - Add MODULE_FIRMWARE for all qat drivers crypto: ccree - silence debug prints crypto: ccree - better clock handling crypto: ccree - correct host regs offset crypto: chelsio - Remove separate buffer used for DMA map B0 block in CCM crypt: chelsio - Send IV as Immediate for cipher algo crypto: chelsio - Return -ENOSPC for transient busy indication. crypto: caam/qi - fix warning in init_cgr() crypto: caam - fix rfc4543 descriptors crypto: caam - fix MC firmware detection crypto: clarify licensing of OpenSSL asm code ...
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/crypto/sha1-armv4-large.S10
-rw-r--r--arch/arm/crypto/sha256-armv4.pl11
-rw-r--r--arch/arm/crypto/sha256-core.S_shipped11
-rw-r--r--arch/arm/crypto/sha512-armv4.pl11
-rw-r--r--arch/arm/crypto/sha512-core.S_shipped11
-rw-r--r--arch/arm64/crypto/Kconfig6
-rw-r--r--arch/arm64/crypto/Makefile3
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-core.S150
-rw-r--r--arch/arm64/crypto/aes-ce.S15
-rw-r--r--arch/arm64/crypto/aes-modes.S331
-rw-r--r--arch/arm64/crypto/aes-neonbs-core.S305
-rw-r--r--arch/arm64/crypto/crc32-ce-core.S40
-rw-r--r--arch/arm64/crypto/crct10dif-ce-core.S32
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S113
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c28
-rw-r--r--arch/arm64/crypto/sha1-ce-core.S42
-rw-r--r--arch/arm64/crypto/sha2-ce-core.S37
-rw-r--r--arch/arm64/crypto/sha256-core.S_shipped12
-rw-r--r--arch/arm64/crypto/sha3-ce-core.S77
-rw-r--r--arch/arm64/crypto/sha512-armv8.pl12
-rw-r--r--arch/arm64/crypto/sha512-ce-core.S27
-rw-r--r--arch/arm64/crypto/sha512-core.S_shipped12
-rw-r--r--arch/arm64/crypto/sm4-ce-core.S36
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c73
-rw-r--r--arch/x86/crypto/Makefile25
-rw-r--r--arch/x86/crypto/aegis128-aesni-asm.S749
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c407
-rw-r--r--arch/x86/crypto/aegis128l-aesni-asm.S825
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c407
-rw-r--r--arch/x86/crypto/aegis256-aesni-asm.S702
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c407
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c2
-rw-r--r--arch/x86/crypto/morus1280-avx2-asm.S621
-rw-r--r--arch/x86/crypto/morus1280-avx2-glue.c68
-rw-r--r--arch/x86/crypto/morus1280-sse2-asm.S895
-rw-r--r--arch/x86/crypto/morus1280-sse2-glue.c68
-rw-r--r--arch/x86/crypto/morus1280_glue.c302
-rw-r--r--arch/x86/crypto/morus640-sse2-asm.S614
-rw-r--r--arch/x86/crypto/morus640-sse2-glue.c68
-rw-r--r--arch/x86/crypto/morus640_glue.c298
-rw-r--r--arch/x86/crypto/salsa20-i586-asm_32.S938
-rw-r--r--arch/x86/crypto/salsa20-x86_64-asm_64.S805
-rw-r--r--arch/x86/crypto/salsa20_glue.c91
43 files changed, 7409 insertions, 2288 deletions
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S
index 99207c45ec10..f82cd8cf5a09 100644
--- a/arch/arm/crypto/sha1-armv4-large.S
+++ b/arch/arm/crypto/sha1-armv4-large.S
@@ -1,4 +1,14 @@
#define __ARM_ARCH__ __LINUX_ARM_ARCH__
+@ SPDX-License-Identifier: GPL-2.0
+
+@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
+@ has relicensed it under the GPLv2. Therefore this program is free software;
+@ you can redistribute it and/or modify it under the terms of the GNU General
+@ Public License version 2 as published by the Free Software Foundation.
+@
+@ The original headers, including the original license headers, are
+@ included below for completeness.
+
@ ====================================================================
@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl
index fac0533ea633..b9ec44060ed3 100644
--- a/arch/arm/crypto/sha256-armv4.pl
+++ b/arch/arm/crypto/sha256-armv4.pl
@@ -1,12 +1,19 @@
#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
-#
-# Permission to use under GPL terms is granted.
# ====================================================================
# SHA256 block procedure for ARMv4. May 2007.
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
index 555a1a8eec90..3b58300d611c 100644
--- a/arch/arm/crypto/sha256-core.S_shipped
+++ b/arch/arm/crypto/sha256-core.S_shipped
@@ -1,11 +1,18 @@
+@ SPDX-License-Identifier: GPL-2.0
+
+@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
+@ has relicensed it under the GPLv2. Therefore this program is free software;
+@ you can redistribute it and/or modify it under the terms of the GNU General
+@ Public License version 2 as published by the Free Software Foundation.
+@
+@ The original headers, including the original license headers, are
+@ included below for completeness.
@ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
@ details see http://www.openssl.org/~appro/cryptogams/.
-@
-@ Permission to use under GPL terms is granted.
@ ====================================================================
@ SHA256 block procedure for ARMv4. May 2007.
diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl
index a2b11a844357..fb5d15048c0b 100644
--- a/arch/arm/crypto/sha512-armv4.pl
+++ b/arch/arm/crypto/sha512-armv4.pl
@@ -1,12 +1,19 @@
#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+# This code is taken from the OpenSSL project but the author (Andy Polyakov)
+# has relicensed it under the GPLv2. Therefore this program is free software;
+# you can redistribute it and/or modify it under the terms of the GNU General
+# Public License version 2 as published by the Free Software Foundation.
+#
+# The original headers, including the original license headers, are
+# included below for completeness.
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
-#
-# Permission to use under GPL terms is granted.
# ====================================================================
# SHA512 block procedure for ARMv4. September 2007.
diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped
index 3694c4d4ca2b..b1c334a49cda 100644
--- a/arch/arm/crypto/sha512-core.S_shipped
+++ b/arch/arm/crypto/sha512-core.S_shipped
@@ -1,11 +1,18 @@
+@ SPDX-License-Identifier: GPL-2.0
+
+@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
+@ has relicensed it under the GPLv2. Therefore this program is free software;
+@ you can redistribute it and/or modify it under the terms of the GNU General
+@ Public License version 2 as published by the Free Software Foundation.
+@
+@ The original headers, including the original license headers, are
+@ included below for completeness.
@ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
@ details see http://www.openssl.org/~appro/cryptogams/.
-@
-@ Permission to use under GPL terms is granted.
@ ====================================================================
@ SHA512 block procedure for ARMv4. September 2007.
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index cb5a243110c4..e3fdb0fd6f70 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -47,6 +47,12 @@ config CRYPTO_SM3_ARM64_CE
select CRYPTO_HASH
select CRYPTO_SM3
+config CRYPTO_SM4_ARM64_CE
+ tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_ALGAPI
+ select CRYPTO_SM4
+
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index f35ac684b1c0..bcafd016618e 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -23,6 +23,9 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
+obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o
+sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
+
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S
index e3a375c4cb83..88f5aef7934c 100644
--- a/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -19,24 +19,33 @@
* u32 *macp, u8 const rk[], u32 rounds);
*/
ENTRY(ce_aes_ccm_auth_data)
- ldr w8, [x3] /* leftover from prev round? */
+ frame_push 7
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+ ldr w25, [x22] /* leftover from prev round? */
ld1 {v0.16b}, [x0] /* load mac */
- cbz w8, 1f
- sub w8, w8, #16
+ cbz w25, 1f
+ sub w25, w25, #16
eor v1.16b, v1.16b, v1.16b
-0: ldrb w7, [x1], #1 /* get 1 byte of input */
- subs w2, w2, #1
- add w8, w8, #1
+0: ldrb w7, [x20], #1 /* get 1 byte of input */
+ subs w21, w21, #1
+ add w25, w25, #1
ins v1.b[0], w7
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
beq 8f /* out of input? */
- cbnz w8, 0b
+ cbnz w25, 0b
eor v0.16b, v0.16b, v1.16b
-1: ld1 {v3.4s}, [x4] /* load first round key */
- prfm pldl1strm, [x1]
- cmp w5, #12 /* which key size? */
- add x6, x4, #16
- sub w7, w5, #2 /* modified # of rounds */
+1: ld1 {v3.4s}, [x23] /* load first round key */
+ prfm pldl1strm, [x20]
+ cmp w24, #12 /* which key size? */
+ add x6, x23, #16
+ sub w7, w24, #2 /* modified # of rounds */
bmi 2f
bne 5f
mov v5.16b, v3.16b
@@ -55,33 +64,43 @@ ENTRY(ce_aes_ccm_auth_data)
ld1 {v5.4s}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
- subs w2, w2, #16 /* last data? */
+ subs w21, w21, #16 /* last data? */
eor v0.16b, v0.16b, v5.16b /* final round */
bmi 6f
- ld1 {v1.16b}, [x1], #16 /* load next input block */
+ ld1 {v1.16b}, [x20], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */
- bne 1b
-6: st1 {v0.16b}, [x0] /* store mac */
+ beq 6f
+
+ if_will_cond_yield_neon
+ st1 {v0.16b}, [x19] /* store mac */
+ do_cond_yield_neon
+ ld1 {v0.16b}, [x19] /* reload mac */
+ endif_yield_neon
+
+ b 1b
+6: st1 {v0.16b}, [x19] /* store mac */
beq 10f
- adds w2, w2, #16
+ adds w21, w21, #16
beq 10f
- mov w8, w2
-7: ldrb w7, [x1], #1
+ mov w25, w21
+7: ldrb w7, [x20], #1
umov w6, v0.b[0]
eor w6, w6, w7
- strb w6, [x0], #1
- subs w2, w2, #1
+ strb w6, [x19], #1
+ subs w21, w21, #1
beq 10f
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
b 7b
-8: mov w7, w8
- add w8, w8, #16
+8: mov w7, w25
+ add w25, w25, #16
9: ext v1.16b, v1.16b, v1.16b, #1
adds w7, w7, #1
bne 9b
eor v0.16b, v0.16b, v1.16b
- st1 {v0.16b}, [x0]
-10: str w8, [x3]
+ st1 {v0.16b}, [x19]
+10: str w25, [x22]
+
+ frame_pop
ret
ENDPROC(ce_aes_ccm_auth_data)
@@ -126,19 +145,29 @@ ENTRY(ce_aes_ccm_final)
ENDPROC(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc
- ldr x8, [x6, #8] /* load lower ctr */
- ld1 {v0.16b}, [x5] /* load mac */
-CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
+ frame_push 8
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+ mov x25, x6
+
+ ldr x26, [x25, #8] /* load lower ctr */
+ ld1 {v0.16b}, [x24] /* load mac */
+CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
0: /* outer loop */
- ld1 {v1.8b}, [x6] /* load upper ctr */
- prfm pldl1strm, [x1]
- add x8, x8, #1
- rev x9, x8
- cmp w4, #12 /* which key size? */
- sub w7, w4, #2 /* get modified # of rounds */
+ ld1 {v1.8b}, [x25] /* load upper ctr */
+ prfm pldl1strm, [x20]
+ add x26, x26, #1
+ rev x9, x26
+ cmp w23, #12 /* which key size? */
+ sub w7, w23, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */
- ld1 {v3.4s}, [x3] /* load first round key */
- add x10, x3, #16
+ ld1 {v3.4s}, [x22] /* load first round key */
+ add x10, x22, #16
bmi 1f
bne 4f
mov v5.16b, v3.16b
@@ -165,9 +194,9 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
bpl 2b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
- subs w2, w2, #16
- bmi 6f /* partial block? */
- ld1 {v2.16b}, [x1], #16 /* load next input block */
+ subs w21, w21, #16
+ bmi 7f /* partial block? */
+ ld1 {v2.16b}, [x20], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
@@ -176,18 +205,29 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
eor v1.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
- st1 {v1.16b}, [x0], #16 /* write output block */
- bne 0b
-CPU_LE( rev x8, x8 )
- st1 {v0.16b}, [x5] /* store mac */
- str x8, [x6, #8] /* store lsb end of ctr (BE) */
-5: ret
-
-6: eor v0.16b, v0.16b, v5.16b /* final round mac */
+ st1 {v1.16b}, [x19], #16 /* write output block */
+ beq 5f
+
+ if_will_cond_yield_neon
+ st1 {v0.16b}, [x24] /* store mac */
+ do_cond_yield_neon
+ ld1 {v0.16b}, [x24] /* reload mac */
+ endif_yield_neon
+
+ b 0b
+5:
+CPU_LE( rev x26, x26 )
+ st1 {v0.16b}, [x24] /* store mac */
+ str x26, [x25, #8] /* store lsb end of ctr (BE) */
+
+6: frame_pop
+ ret
+
+7: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
- st1 {v0.16b}, [x5] /* store mac */
- add w2, w2, #16 /* process partial tail block */
-7: ldrb w9, [x1], #1 /* get 1 byte of input */
+ st1 {v0.16b}, [x24] /* store mac */
+ add w21, w21, #16 /* process partial tail block */
+8: ldrb w9, [x20], #1 /* get 1 byte of input */
umov w6, v1.b[0] /* get top crypted ctr byte */
umov w7, v0.b[0] /* get top mac byte */
.if \enc == 1
@@ -197,13 +237,13 @@ CPU_LE( rev x8, x8 )
eor w9, w9, w6
eor w7, w7, w9
.endif
- strb w9, [x0], #1 /* store out byte */
- strb w7, [x5], #1 /* store mac byte */
- subs w2, w2, #1
- beq 5b
+ strb w9, [x19], #1 /* store out byte */
+ strb w7, [x24], #1 /* store mac byte */
+ subs w21, w21, #1
+ beq 6b
ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
- b 7b
+ b 8b
.endm
/*
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 50330f5c3adc..623e74ed1c67 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -30,18 +30,21 @@
.endm
/* prepare for encryption with key in rk[] */
- .macro enc_prepare, rounds, rk, ignore
- load_round_keys \rounds, \rk
+ .macro enc_prepare, rounds, rk, temp
+ mov \temp, \rk
+ load_round_keys \rounds, \temp
.endm
/* prepare for encryption (again) but with new key in rk[] */
- .macro enc_switch_key, rounds, rk, ignore
- load_round_keys \rounds, \rk
+ .macro enc_switch_key, rounds, rk, temp
+ mov \temp, \rk
+ load_round_keys \rounds, \temp
.endm
/* prepare for decryption with key in rk[] */
- .macro dec_prepare, rounds, rk, ignore
- load_round_keys \rounds, \rk
+ .macro dec_prepare, rounds, rk, temp
+ mov \temp, \rk
+ load_round_keys \rounds, \temp
.endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index a68412e1e3a4..483a7130cf0e 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -14,12 +14,12 @@
.align 4
aes_encrypt_block4x:
- encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
+ encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
ret
ENDPROC(aes_encrypt_block4x)
aes_decrypt_block4x:
- decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
+ decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7
ret
ENDPROC(aes_decrypt_block4x)
@@ -31,57 +31,71 @@ ENDPROC(aes_decrypt_block4x)
*/
AES_ENTRY(aes_ecb_encrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 5
- enc_prepare w3, x2, x5
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+
+.Lecbencrestart:
+ enc_prepare w22, x21, x5
.LecbencloopNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lecbenc1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
bl aes_encrypt_block4x
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
+ cond_yield_neon .Lecbencrestart
b .LecbencloopNx
.Lecbenc1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lecbencout
.Lecbencloop:
- ld1 {v0.16b}, [x1], #16 /* get next pt block */
- encrypt_block v0, w3, x2, x5, w6
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
+ ld1 {v0.16b}, [x20], #16 /* get next pt block */
+ encrypt_block v0, w22, x21, x5, w6
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
bne .Lecbencloop
.Lecbencout:
- ldp x29, x30, [sp], #16
+ frame_pop
ret
AES_ENDPROC(aes_ecb_encrypt)
AES_ENTRY(aes_ecb_decrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 5
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
- dec_prepare w3, x2, x5
+.Lecbdecrestart:
+ dec_prepare w22, x21, x5
.LecbdecloopNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lecbdec1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
bl aes_decrypt_block4x
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
+ cond_yield_neon .Lecbdecrestart
b .LecbdecloopNx
.Lecbdec1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lecbdecout
.Lecbdecloop:
- ld1 {v0.16b}, [x1], #16 /* get next ct block */
- decrypt_block v0, w3, x2, x5, w6
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
+ ld1 {v0.16b}, [x20], #16 /* get next ct block */
+ decrypt_block v0, w22, x21, x5, w6
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
bne .Lecbdecloop
.Lecbdecout:
- ldp x29, x30, [sp], #16
+ frame_pop
ret
AES_ENDPROC(aes_ecb_decrypt)
@@ -94,78 +108,100 @@ AES_ENDPROC(aes_ecb_decrypt)
*/
AES_ENTRY(aes_cbc_encrypt)
- ld1 {v4.16b}, [x5] /* get iv */
- enc_prepare w3, x2, x6
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+.Lcbcencrestart:
+ ld1 {v4.16b}, [x24] /* get iv */
+ enc_prepare w22, x21, x6
.Lcbcencloop4x:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lcbcenc1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */
eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
- encrypt_block v0, w3, x2, x6, w7
+ encrypt_block v0, w22, x21, x6, w7
eor v1.16b, v1.16b, v0.16b
- encrypt_block v1, w3, x2, x6, w7
+ encrypt_block v1, w22, x21, x6, w7
eor v2.16b, v2.16b, v1.16b
- encrypt_block v2, w3, x2, x6, w7
+ encrypt_block v2, w22, x21, x6, w7
eor v3.16b, v3.16b, v2.16b
- encrypt_block v3, w3, x2, x6, w7
- st1 {v0.16b-v3.16b}, [x0], #64
+ encrypt_block v3, w22, x21, x6, w7
+ st1 {v0.16b-v3.16b}, [x19], #64
mov v4.16b, v3.16b
+ st1 {v4.16b}, [x24] /* return iv */
+ cond_yield_neon .Lcbcencrestart
b .Lcbcencloop4x
.Lcbcenc1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lcbcencout
.Lcbcencloop:
- ld1 {v0.16b}, [x1], #16 /* get next pt block */
+ ld1 {v0.16b}, [x20], #16 /* get next pt block */
eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
- encrypt_block v4, w3, x2, x6, w7
- st1 {v4.16b}, [x0], #16
- subs w4, w4, #1
+ encrypt_block v4, w22, x21, x6, w7
+ st1 {v4.16b}, [x19], #16
+ subs w23, w23, #1
bne .Lcbcencloop
.Lcbcencout:
- st1 {v4.16b}, [x5] /* return iv */
+ st1 {v4.16b}, [x24] /* return iv */
+ frame_pop
ret
AES_ENDPROC(aes_cbc_encrypt)
AES_ENTRY(aes_cbc_decrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 6
+
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
- ld1 {v7.16b}, [x5] /* get iv */
- dec_prepare w3, x2, x6
+.Lcbcdecrestart:
+ ld1 {v7.16b}, [x24] /* get iv */
+ dec_prepare w22, x21, x6
.LcbcdecloopNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lcbcdec1x
- ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
+ ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */
mov v4.16b, v0.16b
mov v5.16b, v1.16b
mov v6.16b, v2.16b
bl aes_decrypt_block4x
- sub x1, x1, #16
+ sub x20, x20, #16
eor v0.16b, v0.16b, v7.16b
eor v1.16b, v1.16b, v4.16b
- ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
+ ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */
eor v2.16b, v2.16b, v5.16b
eor v3.16b, v3.16b, v6.16b
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
+ st1 {v7.16b}, [x24] /* return iv */
+ cond_yield_neon .Lcbcdecrestart
b .LcbcdecloopNx
.Lcbcdec1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lcbcdecout
.Lcbcdecloop:
- ld1 {v1.16b}, [x1], #16 /* get next ct block */
+ ld1 {v1.16b}, [x20], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
- decrypt_block v0, w3, x2, x6, w7
+ decrypt_block v0, w22, x21, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
- st1 {v0.16b}, [x0], #16
- subs w4, w4, #1
+ st1 {v0.16b}, [x19], #16
+ subs w23, w23, #1
bne .Lcbcdecloop
.Lcbcdecout:
- st1 {v7.16b}, [x5] /* return iv */
- ldp x29, x30, [sp], #16
+ st1 {v7.16b}, [x24] /* return iv */
+ frame_pop
ret
AES_ENDPROC(aes_cbc_decrypt)
@@ -176,19 +212,26 @@ AES_ENDPROC(aes_cbc_decrypt)
*/
AES_ENTRY(aes_ctr_encrypt)
- stp x29, x30, [sp, #-16]!
- mov x29, sp
+ frame_push 6
- enc_prepare w3, x2, x6
- ld1 {v4.16b}, [x5]
+ mov x19, x0
+ mov x20, x1
+ mov x21, x2
+ mov x22, x3
+ mov x23, x4
+ mov x24, x5
+
+.Lctrrestart:
+ enc_prepare w22, x21, x6
+ ld1 {v4.16b}, [x24]
umov x6, v4.d[1] /* keep swabbed ctr in reg */
rev x6, x6
- cmn w6, w4 /* 32 bit overflow? */
- bcs .Lctrloop
.LctrloopNx:
- subs w4, w4, #4
+ subs w23, w23, #4
bmi .Lctr1x
+ cmn w6, #4 /* 32 bit overflow? */
+ bcs .Lctr1x
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
dup v7.4s, w6
mov v0.16b, v4.16b
@@ -200,25 +243,27 @@ AES_ENTRY(aes_ctr_encrypt)
mov v1.s[3], v8.s[0]
mov v2.s[3], v8.s[1]
mov v3.s[3], v8.s[2]
- ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
+ ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */
bl aes_encrypt_block4x
eor v0.16b, v5.16b, v0.16b
- ld1 {v5.16b}, [x1], #16 /* get 1 input block */
+ ld1 {v5.16b}, [x20], #16 /* get 1 input block */
eor v1.16b, v6.16b, v1.16b
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
- st1 {v0.16b-v3.16b}, [x0], #64
+ st1 {v0.16b-v3.16b}, [x19], #64
add x6, x6, #4
rev x7, x6
ins v4.d[1], x7
- cbz w4, .Lctrout
+ cbz w23, .Lctrout
+ st1 {v4.16b}, [x24] /* return next CTR value */
+ cond_yield_neon .Lctrrestart
b .LctrloopNx
.Lctr1x:
- adds w4, w4, #4
+ adds w23, w23, #4
beq .Lctrout
.Lctrloop:
mov v0.16b, v4.16b
- encrypt_block v0, w3, x2, x8, w7
+ encrypt_block v0, w22, x21, x8, w7
adds x6, x6, #1 /* increment BE ctr */
rev x7, x6
@@ -226,22 +271,22 @@ AES_ENTRY(aes_ctr_encrypt)
bcs .Lctrcarry /* overflow? */
.Lctrcarrydone:
- subs w4, w4, #1
+ subs w23, w23, #1
bmi .Lctrtailblock /* blocks <0 means tail