diff options
author | Andy Polyakov <appro@openssl.org> | 2017-02-03 12:07:16 +0100 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2017-02-06 08:21:42 +0100 |
commit | 384e6de4c7e35e37fb3d6fbeb32ddcb5eb0d3d3f (patch) | |
tree | 3eba1883b51094452284e267b6772b261db622b4 /crypto/sha/asm | |
parent | e1dbf7f431b996010844e220d3200cbf2122dbb3 (diff) |
x86_64 assembly pack: Win64 SEH face-lift.
- harmonize handlers with guidelines and themselves;
- fix some bugs in handlers;
- add missing handlers in chacha and ecp_nistz256 modules;
Reviewed-by: Rich Salz <rsalz@openssl.org>
Diffstat (limited to 'crypto/sha/asm')
-rwxr-xr-x | crypto/sha/asm/sha1-x86_64.pl | 127 | ||||
-rwxr-xr-x | crypto/sha/asm/sha512-x86_64.pl | 91 |
2 files changed, 105 insertions, 113 deletions
diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl index f06fa515a2..c0cc7b3898 100755 --- a/crypto/sha/asm/sha1-x86_64.pl +++ b/crypto/sha/asm/sha1-x86_64.pl @@ -462,7 +462,8 @@ my @V=($A,$B,$C,$D,$E)=("%eax","%ebx","%ecx","%edx","%ebp"); # size optimization my @T=("%esi","%edi"); my $j=0; my $rx=0; -my $K_XX_XX="%r11"; +my $K_XX_XX="%r14"; +my $fp="%r11"; my $_rol=sub { &rol(@_) }; my $_ror=sub { &ror(@_) }; @@ -483,7 +484,7 @@ $code.=<<___; .align 16 sha1_block_data_order_ssse3: _ssse3_shortcut: - mov %rsp,%rax + mov %rsp,$fp # frame pointer push %rbx push %rbp push %r12 @@ -492,16 +493,15 @@ _ssse3_shortcut: lea `-64-($win64?6*16:0)`(%rsp),%rsp ___ $code.=<<___ if ($win64); - movaps %xmm6,-40-6*16(%rax) - movaps %xmm7,-40-5*16(%rax) - movaps %xmm8,-40-4*16(%rax) - movaps %xmm9,-40-3*16(%rax) - movaps %xmm10,-40-2*16(%rax) - movaps %xmm11,-40-1*16(%rax) + movaps %xmm6,-40-6*16($fp) + movaps %xmm7,-40-5*16($fp) + movaps %xmm8,-40-4*16($fp) + movaps %xmm9,-40-3*16($fp) + movaps %xmm10,-40-2*16($fp) + movaps %xmm11,-40-1*16($fp) .Lprologue_ssse3: ___ $code.=<<___; - mov %rax,%r14 # original %rsp and \$-64,%rsp mov %rdi,$ctx # reassigned argument mov %rsi,$inp # reassigned argument @@ -908,21 +908,20 @@ $code.=<<___; mov $E,16($ctx) ___ $code.=<<___ if ($win64); - movaps -40-6*16(%r14),%xmm6 - movaps -40-5*16(%r14),%xmm7 - movaps -40-4*16(%r14),%xmm8 - movaps -40-3*16(%r14),%xmm9 - movaps -40-2*16(%r14),%xmm10 - movaps -40-1*16(%r14),%xmm11 + movaps -40-6*16($fp),%xmm6 + movaps -40-5*16($fp),%xmm7 + movaps -40-4*16($fp),%xmm8 + movaps -40-3*16($fp),%xmm9 + movaps -40-2*16($fp),%xmm10 + movaps -40-1*16($fp),%xmm11 ___ $code.=<<___; - lea (%r14),%rsi - mov -40(%rsi),%r14 - mov -32(%rsi),%r13 - mov -24(%rsi),%r12 - mov -16(%rsi),%rbp - mov -8(%rsi),%rbx - lea (%rsi),%rsp + mov -40($fp),%r14 + mov -32($fp),%r13 + mov -24($fp),%r12 + mov -16($fp),%rbp + mov -8($fp),%rbx + lea ($fp),%rsp .Lepilogue_ssse3: ret .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3 @@ -945,7 +944,7 @@ $code.=<<___; .align 16 sha1_block_data_order_avx: _avx_shortcut: - mov %rsp,%rax + mov %rsp,$fp push %rbx push %rbp push %r12 @@ -955,16 +954,15 @@ _avx_shortcut: vzeroupper ___ $code.=<<___ if ($win64); - vmovaps %xmm6,-40-6*16(%rax) - vmovaps %xmm7,-40-5*16(%rax) - vmovaps %xmm8,-40-4*16(%rax) - vmovaps %xmm9,-40-3*16(%rax) - vmovaps %xmm10,-40-2*16(%rax) - vmovaps %xmm11,-40-1*16(%rax) + vmovaps %xmm6,-40-6*16($fp) + vmovaps %xmm7,-40-5*16($fp) + vmovaps %xmm8,-40-4*16($fp) + vmovaps %xmm9,-40-3*16($fp) + vmovaps %xmm10,-40-2*16($fp) + vmovaps %xmm11,-40-1*16($fp) .Lprologue_avx: ___ $code.=<<___; - mov %rax,%r14 # original %rsp and \$-64,%rsp mov %rdi,$ctx # reassigned argument mov %rsi,$inp # reassigned argument @@ -1272,21 +1270,20 @@ $code.=<<___; mov $E,16($ctx) ___ $code.=<<___ if ($win64); - movaps -40-6*16(%r14),%xmm6 - movaps -40-5*16(%r14),%xmm7 - movaps -40-4*16(%r14),%xmm8 - movaps -40-3*16(%r14),%xmm9 - movaps -40-2*16(%r14),%xmm10 - movaps -40-1*16(%r14),%xmm11 + movaps -40-6*16($fp),%xmm6 + movaps -40-5*16($fp),%xmm7 + movaps -40-4*16($fp),%xmm8 + movaps -40-3*16($fp),%xmm9 + movaps -40-2*16($fp),%xmm10 + movaps -40-1*16($fp),%xmm11 ___ $code.=<<___; - lea (%r14),%rsi - mov -40(%rsi),%r14 - mov -32(%rsi),%r13 - mov -24(%rsi),%r12 - mov -16(%rsi),%rbp - mov -8(%rsi),%rbx - lea (%rsi),%rsp + mov -40($fp),%r14 + mov -32($fp),%r13 + mov -24($fp),%r12 + mov -16($fp),%rbp + mov -8($fp),%rbx + lea ($fp),%rsp .Lepilogue_avx: ret .size sha1_block_data_order_avx,.-sha1_block_data_order_avx @@ -1312,7 +1309,7 @@ $code.=<<___; .align 16 sha1_block_data_order_avx2: _avx2_shortcut: - mov %rsp,%rax + mov %rsp,$fp push %rbx push %rbp push %r12 @@ -1322,16 +1319,15 @@ _avx2_shortcut: ___ $code.=<<___ if ($win64); lea -6*16(%rsp),%rsp - vmovaps %xmm6,-40-6*16(%rax) - vmovaps %xmm7,-40-5*16(%rax) - vmovaps %xmm8,-40-4*16(%rax) - vmovaps %xmm9,-40-3*16(%rax) - vmovaps %xmm10,-40-2*16(%rax) - vmovaps %xmm11,-40-1*16(%rax) + vmovaps %xmm6,-40-6*16($fp) + vmovaps %xmm7,-40-5*16($fp) + vmovaps %xmm8,-40-4*16($fp) + vmovaps %xmm9,-40-3*16($fp) + vmovaps %xmm10,-40-2*16($fp) + vmovaps %xmm11,-40-1*16($fp) .Lprologue_avx2: ___ $code.=<<___; - mov %rax,%r14 # original %rsp mov %rdi,$ctx # reassigned argument mov %rsi,$inp # reassigned argument mov %rdx,$num # reassigned argument @@ -1751,21 +1747,20 @@ $code.=<<___; vzeroupper ___ $code.=<<___ if ($win64); - movaps -40-6*16(%r14),%xmm6 - movaps -40-5*16(%r14),%xmm7 - movaps -40-4*16(%r14),%xmm8 - movaps -40-3*16(%r14),%xmm9 - movaps -40-2*16(%r14),%xmm10 - movaps -40-1*16(%r14),%xmm11 + movaps -40-6*16($fp),%xmm6 + movaps -40-5*16($fp),%xmm7 + movaps -40-4*16($fp),%xmm8 + movaps -40-3*16($fp),%xmm9 + movaps -40-2*16($fp),%xmm10 + movaps -40-1*16($fp),%xmm11 ___ $code.=<<___; - lea (%r14),%rsi - mov -40(%rsi),%r14 - mov -32(%rsi),%r13 - mov -24(%rsi),%r12 - mov -16(%rsi),%rbp - mov -8(%rsi),%rbx - lea (%rsi),%rsp + mov -40($fp),%r14 + mov -32($fp),%r13 + mov -24($fp),%r12 + mov -16($fp),%rbp + mov -8($fp),%rbx + lea ($fp),%rsp .Lepilogue_avx2: ret .size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2 @@ -1908,15 +1903,13 @@ ssse3_handler: cmp %r10,%rbx # context->Rip<prologue label jb .Lcommon_seh_tail - mov 152($context),%rax # pull context->Rsp + mov 208($context),%rax # pull context->R11 mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # epilogue label cmp %r10,%rbx # context->Rip>=epilogue label jae .Lcommon_seh_tail - mov 232($context),%rax # pull context->R14 - lea -40-6*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 mov \$12,%ecx diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl index 5a1cbcf0ca..7a8ed7c7f2 100755 --- a/crypto/sha/asm/sha512-x86_64.pl +++ b/crypto/sha/asm/sha512-x86_64.pl @@ -301,13 +301,13 @@ $code.=<<___ if ($SZ==4); jnz .Lssse3_shortcut ___ $code.=<<___; + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp shl \$4,%rdx # num*16 sub \$$framesz,%rsp lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ @@ -315,7 +315,7 @@ $code.=<<___; mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp .Lprologue: mov $SZ*0($ctx),$A @@ -382,13 +382,13 @@ $code.=<<___; jb .Lloop mov $_rsp,%rsi - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue: ret .size $func,.-$func @@ -761,13 +761,13 @@ $code.=<<___; .align 64 ${func}_ssse3: .Lssse3_shortcut: + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp shl \$4,%rdx # num*16 sub \$`$framesz+$win64*16*4`,%rsp lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ @@ -775,7 +775,7 @@ ${func}_ssse3: mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp ___ $code.=<<___ if ($win64); movaps %xmm6,16*$SZ+32(%rsp) @@ -1082,13 +1082,13 @@ $code.=<<___ if ($win64); movaps 16*$SZ+80(%rsp),%xmm9 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_ssse3: ret .size ${func}_ssse3,.-${func}_ssse3 @@ -1105,13 +1105,13 @@ $code.=<<___; .align 64 ${func}_xop: .Lxop_shortcut: + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp shl \$4,%rdx # num*16 sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ @@ -1119,7 +1119,7 @@ ${func}_xop: mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp ___ $code.=<<___ if ($win64); movaps %xmm6,16*$SZ+32(%rsp) @@ -1459,13 +1459,13 @@ $code.=<<___ if ($win64 && $SZ>4); movaps 16*$SZ+112(%rsp),%xmm11 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_xop: ret .size ${func}_xop,.-${func}_xop @@ -1481,13 +1481,13 @@ $code.=<<___; .align 64 ${func}_avx: .Lavx_shortcut: + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp shl \$4,%rdx # num*16 sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ @@ -1495,7 +1495,7 @@ ${func}_avx: mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp ___ $code.=<<___ if ($win64); movaps %xmm6,16*$SZ+32(%rsp) @@ -1767,13 +1767,13 @@ $code.=<<___ if ($win64 && $SZ>4); movaps 16*$SZ+112(%rsp),%xmm11 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_avx: ret .size ${func}_avx,.-${func}_avx @@ -1832,13 +1832,13 @@ $code.=<<___; .align 64 ${func}_avx2: .Lavx2_shortcut: + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp sub \$`2*$SZ*$rounds+4*8+$win64*16*($SZ==4?4:6)`,%rsp shl \$4,%rdx # num*16 and \$-256*$SZ,%rsp # align stack frame @@ -1847,7 +1847,7 @@ ${func}_avx2: mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp ___ $code.=<<___ if ($win64); movaps %xmm6,16*$SZ+32(%rsp) @@ -2141,13 +2141,13 @@ $code.=<<___ if ($win64 && $SZ>4); movaps 16*$SZ+112(%rsp),%xmm11 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_avx2: ret .size ${func}_avx2,.-${func}_avx2 @@ -2209,7 +2209,6 @@ ___ $code.=<<___; mov %rax,%rsi # put aside Rsp mov 16*$SZ+3*8(%rax),%rax # pull $_rsp - lea 48(%rax),%rax mov -8(%rax),%rbx mov -16(%rax),%rbp |