From 56ba280ccd71b2739ef74827c7a2bf71ced3ab80 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 12 Jun 2014 21:45:41 +0200 Subject: Facilitate back-porting of AESNI and SHA modules. Fix SEH and stack handling in Win64 build. (cherry picked from commit 977f32e85241cba8be53e44dade32231e8a91718) --- crypto/sha/asm/sha1-586.pl | 12 ++++++++---- crypto/sha/asm/sha1-x86_64.pl | 12 ++++++++++-- crypto/sha/asm/sha256-586.pl | 8 +++++--- crypto/sha/asm/sha512-x86_64.pl | 11 ++++++++--- 4 files changed, 31 insertions(+), 12 deletions(-) (limited to 'crypto/sha') diff --git a/crypto/sha/asm/sha1-586.pl b/crypto/sha/asm/sha1-586.pl index acc4f639a7..81252a62e9 100644 --- a/crypto/sha/asm/sha1-586.pl +++ b/crypto/sha/asm/sha1-586.pl @@ -128,6 +128,8 @@ $ymm=1 if ($xmm && !$ymm && $ARGV[0] eq "win32" && `ml 2>&1` =~ /Version ([0-9]+)\./ && $1>=10); # first version supporting AVX +$shaext=$xmm; ### set to zero if compiling for 1.0.1 + &external_label("OPENSSL_ia32cap_P") if ($xmm); @@ -307,7 +309,7 @@ if ($alt) { &function_begin("sha1_block_data_order"); if ($xmm) { - &static_label("shaext_shortcut"); + &static_label("shaext_shortcut") if ($shaext); &static_label("ssse3_shortcut"); &static_label("avx_shortcut") if ($ymm); &static_label("K_XX_XX"); @@ -325,8 +327,10 @@ if ($xmm) { &mov ($C,&DWP(8,$T)); &test ($A,1<<24); # check FXSR bit &jz (&label("x86")); - &test ($C,1<<29); # check SHA bit - &jnz (&label("shaext_shortcut")); + if ($shaext) { + &test ($C,1<<29); # check SHA bit + &jnz (&label("shaext_shortcut")); + } if ($ymm) { &and ($D,1<<28); # mask AVX bit &and ($A,1<<30); # mask "Intel CPU" bit @@ -405,7 +409,7 @@ if ($xmm) { &function_end("sha1_block_data_order"); if ($xmm) { -{ +if ($shaext) { ###################################################################### # Intel SHA Extensions implementation of SHA1 update function. # diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl index ea288c15d5..6dc64a2ead 100755 --- a/crypto/sha/asm/sha1-x86_64.pl +++ b/crypto/sha/asm/sha1-x86_64.pl @@ -107,6 +107,9 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } +$shaext=1; ### set to zero if compiling for 1.0.1 +$avx=1 if (!$shaext && $avx); + open OUT,"| \"$^X\" $xlate $flavour $output"; *STDOUT=*OUT; @@ -245,7 +248,8 @@ sha1_block_data_order: mov OPENSSL_ia32cap_P+8(%rip),%r10d test \$`1<<9`,%r8d # check SSSE3 bit jz .Lialu - +___ +$code.=<<___ if ($shaext); test \$`1<<29`,%r10d # check SHA bit jnz _shaext_shortcut ___ @@ -321,7 +325,7 @@ $code.=<<___; ret .size sha1_block_data_order,.-sha1_block_data_order ___ -{{{ +if ($shaext) {{{ ###################################################################### # Intel SHA Extensions implementation of SHA1 update function. # @@ -1956,9 +1960,13 @@ ssse3_handler: .rva .LSEH_begin_sha1_block_data_order .rva .LSEH_end_sha1_block_data_order .rva .LSEH_info_sha1_block_data_order +___ +$code.=<<___ if ($shaext); .rva .LSEH_begin_sha1_block_data_order_shaext .rva .LSEH_end_sha1_block_data_order_shaext .rva .LSEH_info_sha1_block_data_order_shaext +___ +$code.=<<___; .rva .LSEH_begin_sha1_block_data_order_ssse3 .rva .LSEH_end_sha1_block_data_order_ssse3 .rva .LSEH_info_sha1_block_data_order_ssse3 diff --git a/crypto/sha/asm/sha256-586.pl b/crypto/sha/asm/sha256-586.pl index 09648a8207..ee094a9214 100644 --- a/crypto/sha/asm/sha256-586.pl +++ b/crypto/sha/asm/sha256-586.pl @@ -82,6 +82,8 @@ if ($xmm && !$avx && $ARGV[0] eq "win32" && $avx = ($1>=10) + ($1>=11); } +$shaext=$xmm; ### set to zero if compiling for 1.0.1 + $unroll_after = 64*4; # If pre-evicted from L1P cache first spin of # fully unrolled loop was measured to run about # 3-4x slower. If slowdown coefficient is N and @@ -205,8 +207,8 @@ sub BODY_00_15() { &jz ($unroll_after?&label("no_xmm"):&label("loop")); &and ("ecx",1<<30); # mask "Intel CPU" bit &and ("ebx",1<<28|1<<9); # mask AVX and SSSE3 bits - &test ("edx",1<<29) if ($xmm); # check for SHA - &jnz (&label("shaext")) if ($xmm); + &test ("edx",1<<29) if ($shaext); # check for SHA + &jnz (&label("shaext")) if ($shaext); &or ("ecx","ebx"); &and ("ecx",1<<28|1<<30); &cmp ("ecx",1<<28|1<<30); @@ -505,7 +507,7 @@ my @AH=($A,$K256); &function_end_A(); } if (!$i386 && $xmm) {{{ -{ +if ($shaext) { ###################################################################### # Intel SHA Extensions implementation of SHA256 update function. # diff --git a/crypto/sha/asm/sha512-x86_64.pl b/crypto/sha/asm/sha512-x86_64.pl index 01698c40cf..0556a8f36a 100755 --- a/crypto/sha/asm/sha512-x86_64.pl +++ b/crypto/sha/asm/sha512-x86_64.pl @@ -123,6 +123,9 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $avx = ($1>=10) + ($1>=11); } +$shaext=1; ### set to zero if compiling for 1.0.1 +$avx=1 if (!$shaext && $avx); + open OUT,"| \"$^X\" $xlate $flavour $output"; *STDOUT=*OUT; @@ -259,7 +262,7 @@ $code.=<<___ if ($SZ==4 || $avx); mov 4(%r11),%r10d mov 8(%r11),%r11d ___ -$code.=<<___ if ($SZ==4); +$code.=<<___ if ($SZ==4 && $shaext); test \$`1<<29`,%r11d # check for SHA jnz _shaext_shortcut ___ @@ -518,7 +521,7 @@ ___ ###################################################################### # SIMD code paths # -if ($SZ==4) {{{ +if ($SZ==4 && $shaext) {{{ ###################################################################### # Intel SHA Extensions implementation of SHA256 update function. # @@ -2295,10 +2298,12 @@ shaext_handler: .rva .LSEH_end_$func .rva .LSEH_info_$func ___ -$code.=<<___ if ($SZ==4); +$code.=<<___ if ($SZ==4 && $shext); .rva .LSEH_begin_${func}_shaext .rva .LSEH_end_${func}_shaext .rva .LSEH_info_${func}_shaext +___ +$code.=<<___ if ($SZ==4); .rva .LSEH_begin_${func}_ssse3 .rva .LSEH_end_${func}_ssse3 .rva .LSEH_info_${func}_ssse3 -- cgit v1.2.3