summaryrefslogtreecommitdiffstats
path: root/crypto/arm64cpuid.pl
diff options
context:
space:
mode:
authorDaniel Hu <Daniel.Hu@arm.com>2022-02-07 10:17:06 +0000
committerPauli <pauli@openssl.org>2022-05-03 14:37:46 +1000
commitb1b2146ded9ce5a84c62f30c6c4a922b449f6c90 (patch)
tree969d007a0e310df537f7f9495b353bbad4e984d4 /crypto/arm64cpuid.pl
parent04904a0fff639c058d38b355d75485ca5dde0a89 (diff)
Acceleration of chacha20 on aarch64 by SVE
This patch accelerates chacha20 on aarch64 when Scalable Vector Extension (SVE) is supported by CPU. Tested on modern micro-architecture with 256-bit SVE, it has the potential to improve performance up to 20% The solution takes a hybrid approach. SVE will handle multi-blocks that fit the SVE vector length, with Neon/Scalar to process any tail data Test result: With SVE type 1024 bytes 8192 bytes 16384 bytes ChaCha20 1596208.13k 1650010.79k 1653151.06k Without SVE (by Neon/Scalar) type 1024 bytes 8192 bytes 16384 bytes chacha20 1355487.91k 1372678.83k 1372662.44k The assembly code has been reviewed internally by ARM engineer Fangming.Fang@arm.com Signed-off-by: Daniel Hu <Daniel.Hu@arm.com> Reviewed-by: Tomas Mraz <tomas@openssl.org> Reviewed-by: Paul Dale <pauli@openssl.org> (Merged from https://github.com/openssl/openssl/pull/17916)
Diffstat (limited to 'crypto/arm64cpuid.pl')
-rwxr-xr-xcrypto/arm64cpuid.pl16
1 files changed, 16 insertions, 0 deletions
diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl
index ad48c53402..0a6cd01e30 100755
--- a/crypto/arm64cpuid.pl
+++ b/crypto/arm64cpuid.pl
@@ -104,6 +104,22 @@ _armv8_eor3_probe:
ret
.size _armv8_eor3_probe,.-_armv8_eor3_probe
+.globl _armv8_sve_probe
+.type _armv8_sve_probe,%function
+_armv8_sve_probe:
+ AARCH64_VALID_CALL_TARGET
+ .inst 0x04a03000 // eor z0.d,z0.d,z0.d
+ ret
+.size _armv8_sve_probe,.-_armv8_sve_probe
+
+.globl _armv8_sve2_probe
+.type _armv8_sve2_probe,%function
+_armv8_sve2_probe:
+ AARCH64_VALID_CALL_TARGET
+ .inst 0x04e03400 // xar z0.d,z0.d,z0.d
+ ret
+.size _armv8_sve2_probe,.-_armv8_sve2_probe
+
.globl _armv8_cpuid_probe
.type _armv8_cpuid_probe,%function
_armv8_cpuid_probe: