summaryrefslogtreecommitdiffstats
path: root/crypto/sha
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2013-12-18 22:12:08 +0100
committerAndy Polyakov <appro@openssl.org>2013-12-18 22:57:14 +0100
commitfc9c9e47f7165eb24e3f07eb69efc490cce9152c (patch)
treef4a8712ec8519988cf2c8dc62e8beb297345ce32 /crypto/sha
parent68e6ac4379034e0e89550fcecb80b8a6734623fd (diff)
sha1-x86_64.pl: harmonize Win64 SE handlers for SIMD code pathes.
(and ensure stack alignment in the process) (cherry picked from commit fc0503a25cd638b93f7af04640c20042e0329b3b)
Diffstat (limited to 'crypto/sha')
-rwxr-xr-xcrypto/sha/asm/sha1-x86_64.pl134
1 files changed, 77 insertions, 57 deletions
diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl
index 2c89b1feea..b128913dbf 100755
--- a/crypto/sha/asm/sha1-x86_64.pl
+++ b/crypto/sha/asm/sha1-x86_64.pl
@@ -344,21 +344,26 @@ $code.=<<___;
.align 16
sha1_block_data_order_ssse3:
_ssse3_shortcut:
+ mov %rsp,%rax
push %rbx
push %rbp
push %r12
+ push %r13 # redundant, done to share Win64 SE handler
+ push %r14
lea `-64-($win64?6*16:0)`(%rsp),%rsp
___
$code.=<<___ if ($win64);
- movaps %xmm6,64+0(%rsp)
- movaps %xmm7,64+16(%rsp)
- movaps %xmm8,64+32(%rsp)
- movaps %xmm9,64+48(%rsp)
- movaps %xmm10,64+64(%rsp)
- movaps %xmm11,64+80(%rsp)
+ movaps %xmm6,-40-6*16(%rax)
+ movaps %xmm7,-40-5*16(%rax)
+ movaps %xmm8,-40-4*16(%rax)
+ movaps %xmm9,-40-3*16(%rax)
+ movaps %xmm10,-40-2*16(%rax)
+ movaps %xmm11,-40-1*16(%rax)
.Lprologue_ssse3:
___
$code.=<<___;
+ mov %rax,%r14 # original %rsp
+ and \$-64,%rsp
mov %rdi,$ctx # reassigned argument
mov %rsi,$inp # reassigned argument
mov %rdx,$num # reassigned argument
@@ -753,19 +758,21 @@ $code.=<<___;
mov $E,16($ctx)
___
$code.=<<___ if ($win64);
- movaps 64+0(%rsp),%xmm6
- movaps 64+16(%rsp),%xmm7
- movaps 64+32(%rsp),%xmm8
- movaps 64+48(%rsp),%xmm9
- movaps 64+64(%rsp),%xmm10
- movaps 64+80(%rsp),%xmm11
+ movaps -40-6*16(%r14),%xmm6
+ movaps -40-5*16(%r14),%xmm7
+ movaps -40-4*16(%r14),%xmm8
+ movaps -40-3*16(%r14),%xmm9
+ movaps -40-2*16(%r14),%xmm10
+ movaps -40-1*16(%r14),%xmm11
___
$code.=<<___;
- lea `64+($win64?6*16:0)`(%rsp),%rsi
- mov 0(%rsi),%r12
- mov 8(%rsi),%rbp
- mov 16(%rsi),%rbx
- lea 24(%rsi),%rsp
+ lea (%r14),%rsi
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbp
+ mov -8(%rsi),%rbx
+ lea (%rsi),%rsp
.Lepilogue_ssse3:
ret
.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
@@ -788,25 +795,30 @@ $code.=<<___;
.align 16
sha1_block_data_order_avx:
_avx_shortcut:
+ mov %rsp,%rax
push %rbx
push %rbp
push %r12
+ push %r13 # redundant, done to share Win64 SE handler
+ push %r14
lea `-64-($win64?6*16:0)`(%rsp),%rsp
+ vzeroupper
___
$code.=<<___ if ($win64);
- movaps %xmm6,64+0(%rsp)
- movaps %xmm7,64+16(%rsp)
- movaps %xmm8,64+32(%rsp)
- movaps %xmm9,64+48(%rsp)
- movaps %xmm10,64+64(%rsp)
- movaps %xmm11,64+80(%rsp)
+ vmovaps %xmm6,-40-6*16(%rax)
+ vmovaps %xmm7,-40-5*16(%rax)
+ vmovaps %xmm8,-40-4*16(%rax)
+ vmovaps %xmm9,-40-3*16(%rax)
+ vmovaps %xmm10,-40-2*16(%rax)
+ vmovaps %xmm11,-40-1*16(%rax)
.Lprologue_avx:
___
$code.=<<___;
+ mov %rax,%r14 # original %rsp
+ and \$-64,%rsp
mov %rdi,$ctx # reassigned argument
mov %rsi,$inp # reassigned argument
mov %rdx,$num # reassigned argument
- vzeroupper
shl \$6,$num
add $inp,$num
@@ -1110,19 +1122,21 @@ $code.=<<___;
mov $E,16($ctx)
___
$code.=<<___ if ($win64);
- movaps 64+0(%rsp),%xmm6
- movaps 64+16(%rsp),%xmm7
- movaps 64+32(%rsp),%xmm8
- movaps 64+48(%rsp),%xmm9
- movaps 64+64(%rsp),%xmm10
- movaps 64+80(%rsp),%xmm11
+ movaps -40-6*16(%r14),%xmm6
+ movaps -40-5*16(%r14),%xmm7
+ movaps -40-4*16(%r14),%xmm8
+ movaps -40-3*16(%r14),%xmm9
+ movaps -40-2*16(%r14),%xmm10
+ movaps -40-1*16(%r14),%xmm11
___
$code.=<<___;
- lea `64+($win64?6*16:0)`(%rsp),%rsi
- mov 0(%rsi),%r12
- mov 8(%rsi),%rbp
- mov 16(%rsi),%rbx
- lea 24(%rsi),%rsp
+ lea (%r14),%rsi
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbp
+ mov -8(%rsi),%rbx
+ lea (%rsi),%rsp
.Lepilogue_avx:
ret
.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
@@ -1148,28 +1162,29 @@ $code.=<<___;
.align 16
sha1_block_data_order_avx2:
_avx2_shortcut:
+ mov %rsp,%rax
push %rbx
push %rbp
push %r12
push %r13
push %r14
- lea (%rsp),%r14
+ vzeroupper
___
$code.=<<___ if ($win64);
lea -6*16(%rsp),%rsp
- movaps %xmm6,-6*16(%r14)
- movaps %xmm7,-5*16(%r14)
- movaps %xmm8,-4*16(%r14)
- movaps %xmm9,-3*16(%r14)
- movaps %xmm10,-2*16(%r14)
- movaps %xmm11,-1*16(%r14)
+ vmovaps %xmm6,-40-6*16(%rax)
+ vmovaps %xmm7,-40-5*16(%rax)
+ vmovaps %xmm8,-40-4*16(%rax)
+ vmovaps %xmm9,-40-3*16(%rax)
+ vmovaps %xmm10,-40-2*16(%rax)
+ vmovaps %xmm11,-40-1*16(%rax)
.Lprologue_avx2:
___
$code.=<<___;
+ mov %rax,%r14 # original %rsp
mov %rdi,$ctx # reassigned argument
mov %rsi,$inp # reassigned argument
mov %rdx,$num # reassigned argument
- vzeroupper
lea -640(%rsp),%rsp
shl \$6,$num
@@ -1586,21 +1601,21 @@ $code.=<<___;
vzeroupper
___
$code.=<<___ if ($win64);
- movaps -6*16(%r14),%xmm6
- movaps -5*16(%r14),%xmm7
- movaps -4*16(%r14),%xmm8
- movaps -3*16(%r14),%xmm9
- movaps -2*16(%r14),%xmm10
- movaps -1*16(%r14),%xmm11
+ movaps -40-6*16(%r14),%xmm6
+ movaps -40-5*16(%r14),%xmm7
+ movaps -40-4*16(%r14),%xmm8
+ movaps -40-3*16(%r14),%xmm9
+ movaps -40-2*16(%r14),%xmm10
+ movaps -40-1*16(%r14),%xmm11
___
$code.=<<___;
lea (%r14),%rsi
- mov 0(%rsi),%r14
- mov 8(%rsi),%r13
- mov 16(%rsi),%r12
- mov 24(%rsi),%rbp
- mov 32(%rsi),%rbx
- lea 40(%rsi),%rsp
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbp
+ mov -8(%rsi),%rbx
+ lea (%rsi),%rsp
.Lepilogue_avx2:
ret
.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2
@@ -1711,18 +1726,23 @@ ssse3_handler:
cmp %r10,%rbx # context->Rip>=epilogue label
jae .Lcommon_seh_tail
- lea 64(%rax),%rsi
+ mov 232($context),%rax # pull context->R14
+
+ lea -40-6*16(%rax),%rsi
lea 512($context),%rdi # &context.Xmm6
mov \$12,%ecx
.long 0xa548f3fc # cld; rep movsq
- lea `24+64+6*16`(%rax),%rax # adjust stack pointer
mov -8(%rax),%rbx
mov -16(%rax),%rbp
mov -24(%rax),%r12
+ mov -32(%rax),%r13
+ mov -40(%rax),%r14
mov %rbx,144($context) # restore context->Rbx
mov %rbp,160($context) # restore context->Rbp
mov %r12,216($context) # restore cotnext->R12
+ mov %r13,224($context) # restore cotnext->R13
+ mov %r14,232($context) # restore cotnext->R14
.Lcommon_seh_tail:
mov 8(%rax),%rdi