summaryrefslogtreecommitdiffstats
path: root/crypto/armcap.c
diff options
context:
space:
mode:
authorDaniel Hu <Daniel.Hu@arm.com>2022-02-07 10:17:06 +0000
committerPauli <pauli@openssl.org>2022-05-03 14:37:46 +1000
commitb1b2146ded9ce5a84c62f30c6c4a922b449f6c90 (patch)
tree969d007a0e310df537f7f9495b353bbad4e984d4 /crypto/armcap.c
parent04904a0fff639c058d38b355d75485ca5dde0a89 (diff)
Acceleration of chacha20 on aarch64 by SVE
This patch accelerates chacha20 on aarch64 when Scalable Vector Extension (SVE) is supported by CPU. Tested on modern micro-architecture with 256-bit SVE, it has the potential to improve performance up to 20% The solution takes a hybrid approach. SVE will handle multi-blocks that fit the SVE vector length, with Neon/Scalar to process any tail data Test result: With SVE type 1024 bytes 8192 bytes 16384 bytes ChaCha20 1596208.13k 1650010.79k 1653151.06k Without SVE (by Neon/Scalar) type 1024 bytes 8192 bytes 16384 bytes chacha20 1355487.91k 1372678.83k 1372662.44k The assembly code has been reviewed internally by ARM engineer Fangming.Fang@arm.com Signed-off-by: Daniel Hu <Daniel.Hu@arm.com> Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Paul Dale <pauli@openssl.org> (Merged from https://github.com/openssl/openssl/pull/17916)
Diffstat (limited to 'crypto/armcap.c')
-rw-r--r--crypto/armcap.c20
1 files changed, 20 insertions, 0 deletions
diff --git a/crypto/armcap.c b/crypto/armcap.c
index c50322f504..91ba45c950 100644
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@@ -57,6 +57,8 @@ void _armv8_sm3_probe(void);
void _armv8_sm4_probe(void);
void _armv8_sha512_probe(void);
unsigned int _armv8_cpuid_probe(void);
+void _armv8_sve_probe(void);
+void _armv8_sve2_probe(void);
void _armv8_rng_probe(void);
size_t OPENSSL_rndr_asm(unsigned char *buf, size_t len);
@@ -175,8 +177,10 @@ static unsigned long getauxval(unsigned long key)
# define HWCAP_CE_SM3 (1 << 18)
# define HWCAP_CE_SM4 (1 << 19)
# define HWCAP_CE_SHA512 (1 << 21)
+# define HWCAP_SVE (1 << 22)
/* AT_HWCAP2 */
# define HWCAP2 26
+# define HWCAP2_SVE2 (1 << 1)
# define HWCAP2_RNG (1 << 16)
# endif
@@ -270,6 +274,12 @@ void OPENSSL_cpuid_setup(void)
# endif
}
# ifdef __aarch64__
+ if (getauxval(HWCAP) & HWCAP_SVE)
+ OPENSSL_armcap_P |= ARMV8_SVE;
+
+ if (getauxval(HWCAP2) & HWCAP2_SVE2)
+ OPENSSL_armcap_P |= ARMV8_SVE2;
+
if (getauxval(HWCAP2) & HWCAP2_RNG)
OPENSSL_armcap_P |= ARMV8_RNG;
# endif
@@ -331,6 +341,16 @@ void OPENSSL_cpuid_setup(void)
}
# ifdef __aarch64__
if (sigsetjmp(ill_jmp, 1) == 0) {
+ _armv8_sve_probe();
+ OPENSSL_armcap_P |= ARMV8_SVE;
+ }
+
+ if (sigsetjmp(ill_jmp, 1) == 0) {
+ _armv8_sve2_probe();
+ OPENSSL_armcap_P |= ARMV8_SVE2;
+ }
+
+ if (sigsetjmp(ill_jmp, 1) == 0) {
_armv8_rng_probe();
OPENSSL_armcap_P |= ARMV8_RNG;
}