diff options
author | Daniel Hu <Daniel.Hu@arm.com> | 2022-02-07 10:17:06 +0000 |
---|---|---|
committer | Pauli <pauli@openssl.org> | 2022-05-03 14:37:46 +1000 |
commit | b1b2146ded9ce5a84c62f30c6c4a922b449f6c90 (patch) | |
tree | 969d007a0e310df537f7f9495b353bbad4e984d4 /crypto/armcap.c | |
parent | 04904a0fff639c058d38b355d75485ca5dde0a89 (diff) |
Acceleration of chacha20 on aarch64 by SVE
This patch accelerates chacha20 on aarch64 when Scalable Vector Extension
(SVE) is supported by CPU. Tested on modern micro-architecture with
256-bit SVE, it has the potential to improve performance up to 20%
The solution takes a hybrid approach. SVE will handle multi-blocks that fit
the SVE vector length, with Neon/Scalar to process any tail data
Test result:
With SVE
type 1024 bytes 8192 bytes 16384 bytes
ChaCha20 1596208.13k 1650010.79k 1653151.06k
Without SVE (by Neon/Scalar)
type 1024 bytes 8192 bytes 16384 bytes
chacha20 1355487.91k 1372678.83k 1372662.44k
The assembly code has been reviewed internally by
ARM engineer Fangming.Fang@arm.com
Signed-off-by: Daniel Hu <Daniel.Hu@arm.com>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
Reviewed-by: Paul Dale <pauli@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/17916)
Diffstat (limited to 'crypto/armcap.c')
-rw-r--r-- | crypto/armcap.c | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/crypto/armcap.c b/crypto/armcap.c index c50322f504..91ba45c950 100644 --- a/crypto/armcap.c +++ b/crypto/armcap.c @@ -57,6 +57,8 @@ void _armv8_sm3_probe(void); void _armv8_sm4_probe(void); void _armv8_sha512_probe(void); unsigned int _armv8_cpuid_probe(void); +void _armv8_sve_probe(void); +void _armv8_sve2_probe(void); void _armv8_rng_probe(void); size_t OPENSSL_rndr_asm(unsigned char *buf, size_t len); @@ -175,8 +177,10 @@ static unsigned long getauxval(unsigned long key) # define HWCAP_CE_SM3 (1 << 18) # define HWCAP_CE_SM4 (1 << 19) # define HWCAP_CE_SHA512 (1 << 21) +# define HWCAP_SVE (1 << 22) /* AT_HWCAP2 */ # define HWCAP2 26 +# define HWCAP2_SVE2 (1 << 1) # define HWCAP2_RNG (1 << 16) # endif @@ -270,6 +274,12 @@ void OPENSSL_cpuid_setup(void) # endif } # ifdef __aarch64__ + if (getauxval(HWCAP) & HWCAP_SVE) + OPENSSL_armcap_P |= ARMV8_SVE; + + if (getauxval(HWCAP2) & HWCAP2_SVE2) + OPENSSL_armcap_P |= ARMV8_SVE2; + if (getauxval(HWCAP2) & HWCAP2_RNG) OPENSSL_armcap_P |= ARMV8_RNG; # endif @@ -331,6 +341,16 @@ void OPENSSL_cpuid_setup(void) } # ifdef __aarch64__ if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_sve_probe(); + OPENSSL_armcap_P |= ARMV8_SVE; + } + + if (sigsetjmp(ill_jmp, 1) == 0) { + _armv8_sve2_probe(); + OPENSSL_armcap_P |= ARMV8_SVE2; + } + + if (sigsetjmp(ill_jmp, 1) == 0) { _armv8_rng_probe(); OPENSSL_armcap_P |= ARMV8_RNG; } |