summaryrefslogtreecommitdiffstats
path: root/crypto/bn/asm/x86_64-mont5.pl
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2016-03-04 11:39:11 +0100
committerAndy Polyakov <appro@openssl.org>2016-03-07 14:58:14 +0100
commitadc4f1fc25b2cac90076f1e1695b05b7aeeae501 (patch)
tree14ea30fdb1fb103c04e340c9e256cb8a94104bfb /crypto/bn/asm/x86_64-mont5.pl
parent56cd71b46e5fe4f403c6247d83c2ad88e537b6c3 (diff)
bn/asm/x86[_64]-mont*.pl: complement alloca with page-walking.
Some OSes, *cough*-dows, insist on stack being "wired" to physical memory in strictly sequential manner, i.e. if stack allocation spans two pages, then reference to farmost one can be punishable by SEGV. But page walking can do good even on other OSes, because it guarantees that villain thread hits the guard page before it can make damage to innocent one... Reviewed-by: Rich Salz <rsalz@openssl.org>
Diffstat (limited to 'crypto/bn/asm/x86_64-mont5.pl')
-rwxr-xr-xcrypto/bn/asm/x86_64-mont5.pl61
1 files changed, 60 insertions, 1 deletions
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index 2e8c9db32c..938e170818 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -115,6 +115,20 @@ $code.=<<___;
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.Lmul_body:
+ # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # physical memory in strictly sequential manner, i.e. if stack
+ # allocation spans two pages, then reference to farmost one can
+ # be punishable by SEGV. But page walking can do good even on
+ # other OSes, because it guarantees that villain thread hits
+ # the guard page before it can make damage to innocent one...
+ sub %rsp,%rax
+ and \$-4096,%rax
+.Lmul_page_walk:
+ mov (%rsp,%rax),%r11
+ sub \$4096,%rax
+ .byte 0x2e # predict non-taken
+ jnc .Lmul_page_walk
+
lea 128($bp),%r12 # reassign $bp (+size optimization)
___
$bp="%r12";
@@ -469,6 +483,15 @@ $code.=<<___;
sub %r11,%rsp
.Lmul4xsp_done:
and \$-64,%rsp
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lmul4x_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lmul4x_page_walk
+
neg $num
mov %rax,40(%rsp)
@@ -1058,6 +1081,15 @@ $code.=<<___;
sub %r11,%rsp
.Lpwr_sp_done:
and \$-64,%rsp
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lpwr_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lpwr_page_walk
+
mov $num,%r10
neg $num
@@ -2028,7 +2060,16 @@ bn_from_mont8x:
sub %r11,%rsp
.Lfrom_sp_done:
and \$-64,%rsp
- mov $num,%r10
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lfrom_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lfrom_page_walk
+
+ mov $num,%r10
neg $num
##############################################################
@@ -2173,6 +2214,15 @@ bn_mulx4x_mont_gather5:
sub %r11,%rsp
.Lmulx4xsp_done:
and \$-64,%rsp # ensure alignment
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lmulx4x_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lmulx4x_page_walk
+
##############################################################
# Stack layout
# +0 -num
@@ -2619,6 +2669,15 @@ bn_powerx5:
sub %r11,%rsp
.Lpwrx_sp_done:
and \$-64,%rsp
+ mov %rax,%r11
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lpwrx_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lpwrx_page_walk
+
mov $num,%r10
neg $num