summaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorJean-Philippe Boivin <jpboivin@me.com>2021-05-17 16:38:14 -0400
committerTomas Mraz <tomas@openssl.org>2021-05-21 13:05:59 +0200
commitca28c2422a7b32644161caa55f818dfafd8eeb9a (patch)
tree12cd77759c1d61610c73823eeb82773a00c756eb /crypto
parentfdb4cbd20f50e60fc266d9de4b83890e995d3502 (diff)
Properly restore XMM registers in ChaCha20's AVX-512(VL) assembly
Reviewed-by: Paul Dale <pauli@openssl.org> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/15315) (cherry picked from commit 6d3f798cba8075e700003aaf34f1e72bb930086c)
Diffstat (limited to 'crypto')
-rwxr-xr-xcrypto/chacha/asm/chacha-x86_64.pl52
1 files changed, 42 insertions, 10 deletions
diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl
index 227ee59ff2..2ad3c1a38f 100755
--- a/crypto/chacha/asm/chacha-x86_64.pl
+++ b/crypto/chacha/asm/chacha-x86_64.pl
@@ -471,7 +471,7 @@ sub SSSE3ROUND { # critical path is 20 "SIMD ticks" per round
&por ($b,$t);
}
-my $xframe = $win64 ? 32+8 : 8;
+my $xframe = $win64 ? 160+8 : 8;
$code.=<<___;
.type ChaCha20_ssse3,\@function,5
@@ -2499,7 +2499,7 @@ sub AVX512ROUND { # critical path is 14 "SIMD ticks" per round
&vprold ($b,$b,7);
}
-my $xframe = $win64 ? 32+8 : 8;
+my $xframe = $win64 ? 160+8 : 8;
$code.=<<___;
.type ChaCha20_avx512,\@function,5
@@ -2515,8 +2515,16 @@ ChaCha20_avx512:
sub \$64+$xframe,%rsp
___
$code.=<<___ if ($win64);
- movaps %xmm6,-0x28(%r9)
- movaps %xmm7,-0x18(%r9)
+ movaps %xmm6,-0xa8(%r9)
+ movaps %xmm7,-0x98(%r9)
+ movaps %xmm8,-0x88(%r9)
+ movaps %xmm9,-0x78(%r9)
+ movaps %xmm10,-0x68(%r9)
+ movaps %xmm11,-0x58(%r9)
+ movaps %xmm12,-0x48(%r9)
+ movaps %xmm13,-0x38(%r9)
+ movaps %xmm14,-0x28(%r9)
+ movaps %xmm15,-0x18(%r9)
.Lavx512_body:
___
$code.=<<___;
@@ -2683,8 +2691,16 @@ $code.=<<___;
vzeroall
___
$code.=<<___ if ($win64);
- movaps -0x28(%r9),%xmm6
- movaps -0x18(%r9),%xmm7
+ movaps -0xa8(%r9),%xmm6
+ movaps -0x98(%r9),%xmm7
+ movaps -0x88(%r9),%xmm8
+ movaps -0x78(%r9),%xmm9
+ movaps -0x68(%r9),%xmm10
+ movaps -0x58(%r9),%xmm11
+ movaps -0x48(%r9),%xmm12
+ movaps -0x38(%r9),%xmm13
+ movaps -0x28(%r9),%xmm14
+ movaps -0x18(%r9),%xmm15
___
$code.=<<___;
lea (%r9),%rsp
@@ -2711,8 +2727,16 @@ ChaCha20_avx512vl:
sub \$64+$xframe,%rsp
___
$code.=<<___ if ($win64);
- movaps %xmm6,-0x28(%r9)
- movaps %xmm7,-0x18(%r9)
+ movaps %xmm6,-0xa8(%r9)
+ movaps %xmm7,-0x98(%r9)
+ movaps %xmm8,-0x88(%r9)
+ movaps %xmm9,-0x78(%r9)
+ movaps %xmm10,-0x68(%r9)
+ movaps %xmm11,-0x58(%r9)
+ movaps %xmm12,-0x48(%r9)
+ movaps %xmm13,-0x38(%r9)
+ movaps %xmm14,-0x28(%r9)
+ movaps %xmm15,-0x18(%r9)
.Lavx512vl_body:
___
$code.=<<___;
@@ -2836,8 +2860,16 @@ $code.=<<___;
vzeroall
___
$code.=<<___ if ($win64);
- movaps -0x28(%r9),%xmm6
- movaps -0x18(%r9),%xmm7
+ movaps -0xa8(%r9),%xmm6
+ movaps -0x98(%r9),%xmm7
+ movaps -0x88(%r9),%xmm8
+ movaps -0x78(%r9),%xmm9
+ movaps -0x68(%r9),%xmm10
+ movaps -0x58(%r9),%xmm11
+ movaps -0x48(%r9),%xmm12
+ movaps -0x38(%r9),%xmm13
+ movaps -0x28(%r9),%xmm14
+ movaps -0x18(%r9),%xmm15
___
$code.=<<___;
lea (%r9),%rsp