summaryrefslogtreecommitdiffstats
path: root/crypto/sha/asm/sha512-x86_64.pl
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2017-02-03 12:07:16 +0100
committerAndy Polyakov <appro@openssl.org>2017-02-06 08:21:42 +0100
commit384e6de4c7e35e37fb3d6fbeb32ddcb5eb0d3d3f (patch)
tree3eba1883b51094452284e267b6772b261db622b4 /crypto/sha/asm/sha512-x86_64.pl
parente1dbf7f431b996010844e220d3200cbf2122dbb3 (diff)
x86_64 assembly pack: Win64 SEH face-lift.
- harmonize handlers with guidelines and themselves; - fix some bugs in handlers; - add missing handlers in chacha and ecp_nistz256 modules; Reviewed-by: Rich Salz <rsalz@openssl.org>
Diffstat (limited to 'crypto/sha/asm/sha512-x86_64.pl')
-rwxr-xr-xcrypto/sha/asm/sha512-x86_64.pl91
1 files changed, 45 insertions, 46 deletions
diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl
index 5a1cbcf0ca..7a8ed7c7f2 100755
--- a/crypto/sha/asm/sha512-x86_64.pl
+++ b/crypto/sha/asm/sha512-x86_64.pl
@@ -301,13 +301,13 @@ $code.=<<___ if ($SZ==4);
jnz .Lssse3_shortcut
___
$code.=<<___;
+ mov %rsp,%rax # copy %rsp
push %rbx
push %rbp
push %r12
push %r13
push %r14
push %r15
- mov %rsp,%r11 # copy %rsp
shl \$4,%rdx # num*16
sub \$$framesz,%rsp
lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ
@@ -315,7 +315,7 @@ $code.=<<___;
mov $ctx,$_ctx # save ctx, 1st arg
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
- mov %r11,$_rsp # save copy of %rsp
+ mov %rax,$_rsp # save copy of %rsp
.Lprologue:
mov $SZ*0($ctx),$A
@@ -382,13 +382,13 @@ $code.=<<___;
jb .Lloop
mov $_rsp,%rsi
- mov (%rsi),%r15
- mov 8(%rsi),%r14
- mov 16(%rsi),%r13
- mov 24(%rsi),%r12
- mov 32(%rsi),%rbp
- mov 40(%rsi),%rbx
- lea 48(%rsi),%rsp
+ mov -48(%rsi),%r15
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbp
+ mov -8(%rsi),%rbx
+ lea (%rsi),%rsp
.Lepilogue:
ret
.size $func,.-$func
@@ -761,13 +761,13 @@ $code.=<<___;
.align 64
${func}_ssse3:
.Lssse3_shortcut:
+ mov %rsp,%rax # copy %rsp
push %rbx
push %rbp
push %r12
push %r13
push %r14
push %r15
- mov %rsp,%r11 # copy %rsp
shl \$4,%rdx # num*16
sub \$`$framesz+$win64*16*4`,%rsp
lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ
@@ -775,7 +775,7 @@ ${func}_ssse3:
mov $ctx,$_ctx # save ctx, 1st arg
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
- mov %r11,$_rsp # save copy of %rsp
+ mov %rax,$_rsp # save copy of %rsp
___
$code.=<<___ if ($win64);
movaps %xmm6,16*$SZ+32(%rsp)
@@ -1082,13 +1082,13 @@ $code.=<<___ if ($win64);
movaps 16*$SZ+80(%rsp),%xmm9
___
$code.=<<___;
- mov (%rsi),%r15
- mov 8(%rsi),%r14
- mov 16(%rsi),%r13
- mov 24(%rsi),%r12
- mov 32(%rsi),%rbp
- mov 40(%rsi),%rbx
- lea 48(%rsi),%rsp
+ mov -48(%rsi),%r15
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbp
+ mov -8(%rsi),%rbx
+ lea (%rsi),%rsp
.Lepilogue_ssse3:
ret
.size ${func}_ssse3,.-${func}_ssse3
@@ -1105,13 +1105,13 @@ $code.=<<___;
.align 64
${func}_xop:
.Lxop_shortcut:
+ mov %rsp,%rax # copy %rsp
push %rbx
push %rbp
push %r12
push %r13
push %r14
push %r15
- mov %rsp,%r11 # copy %rsp
shl \$4,%rdx # num*16
sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp
lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ
@@ -1119,7 +1119,7 @@ ${func}_xop:
mov $ctx,$_ctx # save ctx, 1st arg
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
- mov %r11,$_rsp # save copy of %rsp
+ mov %rax,$_rsp # save copy of %rsp
___
$code.=<<___ if ($win64);
movaps %xmm6,16*$SZ+32(%rsp)
@@ -1459,13 +1459,13 @@ $code.=<<___ if ($win64 && $SZ>4);
movaps 16*$SZ+112(%rsp),%xmm11
___
$code.=<<___;
- mov (%rsi),%r15
- mov 8(%rsi),%r14
- mov 16(%rsi),%r13
- mov 24(%rsi),%r12
- mov 32(%rsi),%rbp
- mov 40(%rsi),%rbx
- lea 48(%rsi),%rsp
+ mov -48(%rsi),%r15
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbp
+ mov -8(%rsi),%rbx
+ lea (%rsi),%rsp
.Lepilogue_xop:
ret
.size ${func}_xop,.-${func}_xop
@@ -1481,13 +1481,13 @@ $code.=<<___;
.align 64
${func}_avx:
.Lavx_shortcut:
+ mov %rsp,%rax # copy %rsp
push %rbx
push %rbp
push %r12
push %r13
push %r14
push %r15
- mov %rsp,%r11 # copy %rsp
shl \$4,%rdx # num*16
sub \$`$framesz+$win64*16*($SZ==4?4:6)`,%rsp
lea ($inp,%rdx,$SZ),%rdx # inp+num*16*$SZ
@@ -1495,7 +1495,7 @@ ${func}_avx:
mov $ctx,$_ctx # save ctx, 1st arg
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
- mov %r11,$_rsp # save copy of %rsp
+ mov %rax,$_rsp # save copy of %rsp
___
$code.=<<___ if ($win64);
movaps %xmm6,16*$SZ+32(%rsp)
@@ -1767,13 +1767,13 @@ $code.=<<___ if ($win64 && $SZ>4);
movaps 16*$SZ+112(%rsp),%xmm11
___
$code.=<<___;
- mov (%rsi),%r15
- mov 8(%rsi),%r14
- mov 16(%rsi),%r13
- mov 24(%rsi),%r12
- mov 32(%rsi),%rbp
- mov 40(%rsi),%rbx
- lea 48(%rsi),%rsp
+ mov -48(%rsi),%r15
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbp
+ mov -8(%rsi),%rbx
+ lea (%rsi),%rsp
.Lepilogue_avx:
ret
.size ${func}_avx,.-${func}_avx
@@ -1832,13 +1832,13 @@ $code.=<<___;
.align 64
${func}_avx2:
.Lavx2_shortcut:
+ mov %rsp,%rax # copy %rsp
push %rbx
push %rbp
push %r12
push %r13
push %r14
push %r15
- mov %rsp,%r11 # copy %rsp
sub \$`2*$SZ*$rounds+4*8+$win64*16*($SZ==4?4:6)`,%rsp
shl \$4,%rdx # num*16
and \$-256*$SZ,%rsp # align stack frame
@@ -1847,7 +1847,7 @@ ${func}_avx2:
mov $ctx,$_ctx # save ctx, 1st arg
mov $inp,$_inp # save inp, 2nd arh
mov %rdx,$_end # save end pointer, "3rd" arg
- mov %r11,$_rsp # save copy of %rsp
+ mov %rax,$_rsp # save copy of %rsp
___
$code.=<<___ if ($win64);
movaps %xmm6,16*$SZ+32(%rsp)
@@ -2141,13 +2141,13 @@ $code.=<<___ if ($win64 && $SZ>4);
movaps 16*$SZ+112(%rsp),%xmm11
___
$code.=<<___;
- mov (%rsi),%r15
- mov 8(%rsi),%r14
- mov 16(%rsi),%r13
- mov 24(%rsi),%r12
- mov 32(%rsi),%rbp
- mov 40(%rsi),%rbx
- lea 48(%rsi),%rsp
+ mov -48(%rsi),%r15
+ mov -40(%rsi),%r14
+ mov -32(%rsi),%r13
+ mov -24(%rsi),%r12
+ mov -16(%rsi),%rbp
+ mov -8(%rsi),%rbx
+ lea (%rsi),%rsp
.Lepilogue_avx2:
ret
.size ${func}_avx2,.-${func}_avx2
@@ -2209,7 +2209,6 @@ ___
$code.=<<___;
mov %rax,%rsi # put aside Rsp
mov 16*$SZ+3*8(%rax),%rax # pull $_rsp
- lea 48(%rax),%rax
mov -8(%rax),%rbx
mov -16(%rax),%rbp