diff options
author | Andy Polyakov <appro@openssl.org> | 2017-02-03 12:07:16 +0100 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2017-02-06 08:21:42 +0100 |
commit | 384e6de4c7e35e37fb3d6fbeb32ddcb5eb0d3d3f (patch) | |
tree | 3eba1883b51094452284e267b6772b261db622b4 /crypto/sha/asm/sha512-x86_64.pl | |
parent | e1dbf7f431b996010844e220d3200cbf2122dbb3 (diff) |
x86_64 assembly pack: Win64 SEH face-lift.
- harmonize handlers with guidelines and themselves;
- fix some bugs in handlers;
- add missing handlers in chacha and ecp_nistz256 modules;
Reviewed-by: Rich Salz <rsalz@openssl.org>
Diffstat (limited to 'crypto/sha/asm/sha512-x86_64.pl')
-rwxr-xr-x | crypto/sha/asm/sha512-x86_64.pl | 91 |
1 files changed, 45 insertions, 46 deletions
diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl index 5a1cbcf0ca..7a8ed7c7f2 100755 --- a/crypto/sha/asm/sha512-x86_64.pl +++ b/crypto/sha/asm/sha512-x86_64.pl @@ -301,13 +301,13 @@ $code.=<<___ if ($SZ==4); jnz .Lssse3_shortcut ___ $code.=<<___; + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp shl \$4,%rdx # num*16 sub \$$framesz,%rsp lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ @@ -315,7 +315,7 @@ $code.=<<___; mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp .Lprologue: mov $SZ*0($ctx),$A @@ -382,13 +382,13 @@ $code.=<<___; jb .Lloop mov $_rsp,%rsi - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue: ret .size $func,.-$func @@ -761,13 +761,13 @@ $code.=<<___; .align 64 ${func}_ssse3: .Lssse3_shortcut: + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp shl \$4,%rdx # num*16 sub \$`$framesz+$win64*16*4`,%rsp lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ @@ -775,7 +775,7 @@ ${func}_ssse3: mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp ___ $code.=<<___ if ($win64); movaps %xmm6,16*$SZ+32(%rsp) @@ -1082,13 +1082,13 @@ $code.=<<___ if ($win64); movaps 16*$SZ+80(%rsp),%xmm9 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_ssse3: ret .size ${func}_ssse3,.-${func}_ssse3 @@ -1105,13 +1105,13 @@ $code.=<<___; .align 64 ${func}_xop: .Lxop_shortcut: + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp shl \$4,%rdx # num*16 sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ @@ -1119,7 +1119,7 @@ ${func}_xop: mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp ___ $code.=<<___ if ($win64); movaps %xmm6,16*$SZ+32(%rsp) @@ -1459,13 +1459,13 @@ $code.=<<___ if ($win64 && $SZ>4); movaps 16*$SZ+112(%rsp),%xmm11 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_xop: ret .size ${func}_xop,.-${func}_xop @@ -1481,13 +1481,13 @@ $code.=<<___; .align 64 ${func}_avx: .Lavx_shortcut: + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp shl \$4,%rdx # num*16 sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ @@ -1495,7 +1495,7 @@ ${func}_avx: mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp ___ $code.=<<___ if ($win64); movaps %xmm6,16*$SZ+32(%rsp) @@ -1767,13 +1767,13 @@ $code.=<<___ if ($win64 && $SZ>4); movaps 16*$SZ+112(%rsp),%xmm11 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_avx: ret .size ${func}_avx,.-${func}_avx @@ -1832,13 +1832,13 @@ $code.=<<___; .align 64 ${func}_avx2: .Lavx2_shortcut: + mov %rsp,%rax # copy %rsp push %rbx push %rbp push %r12 push %r13 push %r14 push %r15 - mov %rsp,%r11 # copy %rsp sub \$`2*$SZ*$rounds+4*8+$win64*16*($SZ==4?4:6)`,%rsp shl \$4,%rdx # num*16 and \$-256*$SZ,%rsp # align stack frame @@ -1847,7 +1847,7 @@ ${func}_avx2: mov $ctx,$_ctx # save ctx, 1st arg mov $inp,$_inp # save inp, 2nd arh mov %rdx,$_end # save end pointer, "3rd" arg - mov %r11,$_rsp # save copy of %rsp + mov %rax,$_rsp # save copy of %rsp ___ $code.=<<___ if ($win64); movaps %xmm6,16*$SZ+32(%rsp) @@ -2141,13 +2141,13 @@ $code.=<<___ if ($win64 && $SZ>4); movaps 16*$SZ+112(%rsp),%xmm11 ___ $code.=<<___; - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 + mov -40(%rsi),%r14 + mov -32(%rsi),%r13 + mov -24(%rsi),%r12 + mov -16(%rsi),%rbp + mov -8(%rsi),%rbx + lea (%rsi),%rsp .Lepilogue_avx2: ret .size ${func}_avx2,.-${func}_avx2 @@ -2209,7 +2209,6 @@ ___ $code.=<<___; mov %rax,%rsi # put aside Rsp mov 16*$SZ+3*8(%rax),%rax # pull $_rsp - lea 48(%rax),%rax mov -8(%rax),%rbx mov -16(%rax),%rbp |