summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xConfigure7
-rw-r--r--TABLE86
-rw-r--r--crypto/aes/Makefile4
-rw-r--r--crypto/aes/asm/aesni-x86.pl663
-rw-r--r--crypto/aes/asm/aesni-x86_64.pl963
-rw-r--r--crypto/engine/Makefile6
-rw-r--r--crypto/engine/eng_aesni.c402
-rw-r--r--crypto/engine/eng_all.c3
-rw-r--r--crypto/engine/engine.h1
9 files changed, 2087 insertions, 48 deletions
diff --git a/Configure b/Configure
index 89142eb22e..2c28be4fb7 100755
--- a/Configure
+++ b/Configure
@@ -121,11 +121,11 @@ my $tlib="-lnsl -lsocket";
my $bits1="THIRTY_TWO_BIT ";
my $bits2="SIXTY_FOUR_BIT ";
-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o";
+my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o";
my $x86_elf_asm="$x86_asm:elf";
-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
+my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::void";
my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void";
my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void";
@@ -485,7 +485,7 @@ my %table=(
#
# Win64 targets, WIN64I denotes IA-64 and WIN64A - AMD64
"VC-WIN64I","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE:::WIN64I::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:ia64cpuid.o:ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o:::::::ias:win32",
-"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o::ml64:win32",
+"VC-WIN64A","cl:-W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE:::WIN64A::SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN:x86_64cpuid.o:bn_asm.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o::ml64:win32",
# x86 Win32 target defaults to ANSI API, if you want UNICODE, complement
# 'perl Configure VC-WIN32' with '-DUNICODE -D_UNICODE'
"VC-WIN32","cl:-W3 -WX -Gs0 -GF -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE:::WIN32::BN_LLONG RC4_INDEX EXPORT_VAR_AS_FN ${x86_gcc_opts}:${x86_asm}:win32n:win32",
@@ -1370,6 +1370,7 @@ if ($rmd160_obj =~ /\.o$/)
if ($aes_obj =~ /\.o$/)
{
$cflags.=" -DAES_ASM";
+ $aes_obj =~ s/\s*aesni\-x86\.o// if ($no_sse2);
}
else {
$aes_obj=$aes_enc;
diff --git a/TABLE b/TABLE
index 6205386776..e4325459e9 100644
--- a/TABLE
+++ b/TABLE
@@ -228,7 +228,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -259,7 +259,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -290,7 +290,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -321,7 +321,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -383,7 +383,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -653,7 +653,7 @@ $multilib =
*** VC-WIN32
$cc = cl
-$cflags = -W3 -WX -Gs0 -GF -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -D_CRT_SECURE_NO_DEPRECATE
+$cflags = -W3 -WX -Gs0 -GF -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE
$unistd =
$thread_cflag =
$sys_id = WIN32
@@ -662,7 +662,7 @@ $bn_ops = BN_LLONG RC4_INDEX EXPORT_VAR_AS_FN RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -684,7 +684,7 @@ $multilib =
*** VC-WIN64A
$cc = cl
-$cflags = -W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE
+$cflags = -W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE
$unistd =
$thread_cflag =
$sys_id = WIN64A
@@ -693,7 +693,7 @@ $bn_ops = SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN
$cpuid_obj = x86_64cpuid.o
$bn_obj = bn_asm.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -715,7 +715,7 @@ $multilib =
*** VC-WIN64I
$cc = cl
-$cflags = -W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE
+$cflags = -W3 -Gs0 -Gy -nologo -DOPENSSL_SYSNAME_WIN32 -DWIN32_LEAN_AND_MEAN -DL_ENDIAN -DUNICODE -D_UNICODE -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE
$unistd =
$thread_cflag =
$sys_id = WIN64I
@@ -941,7 +941,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -972,7 +972,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1003,7 +1003,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1127,7 +1127,7 @@ $bn_ops = BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1220,7 +1220,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK BF_PTR2 DES_INT DES_UNROL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -1282,7 +1282,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1530,7 +1530,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1561,7 +1561,7 @@ $bn_ops = BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1685,7 +1685,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1716,7 +1716,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1809,7 +1809,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1840,7 +1840,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1902,7 +1902,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1933,7 +1933,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1964,7 +1964,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -2119,7 +2119,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK BF_PTR2 DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -2150,7 +2150,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -2181,7 +2181,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK BF_PTR2 DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -2336,7 +2336,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -2863,7 +2863,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3204,7 +3204,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3266,7 +3266,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3359,7 +3359,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3638,7 +3638,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK BF_PTR2 DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -3700,7 +3700,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT EXPORT_V
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3731,7 +3731,7 @@ $bn_ops = SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -4320,7 +4320,7 @@ $bn_ops = DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -4351,7 +4351,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -4599,7 +4599,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -4692,7 +4692,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK BF_PTR2 DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -4723,7 +4723,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK BF_PTR2 DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -4971,7 +4971,7 @@ $bn_ops = BN_LLONG MD2_CHAR RC4_INDEX DES_PTR DES_RISC1 DES_UNROLL
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -5002,7 +5002,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile
index c501a43a8f..3517465bd0 100644
--- a/crypto/aes/Makefile
+++ b/crypto/aes/Makefile
@@ -50,9 +50,13 @@ aes-ia64.s: asm/aes-ia64.S
aes-586.s: asm/aes-586.pl ../perlasm/x86asm.pl
$(PERL) asm/aes-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
+aesni-x86.s: asm/aesni-x86.pl ../perlasm/x86asm.pl
+ $(PERL) asm/aesni-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
aes-x86_64.s: asm/aes-x86_64.pl
$(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@
+aesni-x86_64.s: asm/aesni-x86_64.pl
+ $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@
aes-sparcv9.s: asm/aes-sparcv9.pl
$(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
diff --git a/crypto/aes/asm/aesni-x86.pl b/crypto/aes/asm/aesni-x86.pl
new file mode 100644
index 0000000000..a9339a4c38
--- /dev/null
+++ b/crypto/aes/asm/aesni-x86.pl
@@ -0,0 +1,663 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for Intel AES-NI extension. In
+# OpenSSL context it's used with Intel engine, but can also be used as
+# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for
+# details].
+
+$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
+ # generates drop-in replacement for
+ # crypto/aes/asm/aes-586.pl:-)
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86asm.pl";
+
+&asm_init($ARGV[0],$0);
+
+$movekey = eval($RREFIX eq "aseni" ? "*movaps" : "*movups");
+
+$len="eax";
+$rounds="ecx";
+$key="edx";
+$inp="esi";
+$out="edi";
+$rounds_="ebx";
+$key_="ebp";
+
+$inout0="xmm0";
+$inout1="xmm1";
+$inout2="xmm2";
+$rndkey0="xmm3";
+$rndkey1="xmm4";
+$ivec="xmm5";
+$in0="xmm6";
+$in1="xmm7";
+
+sub _aesni_generate1 # folded loop
+{ my $p=shift;
+
+ &function_begin_B("_aesni_${p}rypt1");
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &$movekey ($rndkey1,&QWP(16,$key));
+ &lea ($key,&DWP(16,$key));
+ &pxor ($inout0,$rndkey0);
+ &dec ($rounds);
+ &set_label("${p}1_loop",16);
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &dec ($rounds);
+ &lea ($key,&DWP(16,$key));
+ &$movekey ($rndkey1,&QWP(0,$key));
+ &jnz (&label("${p}1_loop"));
+ eval"&aes${p}last ($inout0,$rndkey1)";
+ &ret();
+ &function_end_B("_aesni_${p}rypt1");
+}
+
+sub aesni_generate1 # fully unrolled loop
+{ my $p=shift;
+
+ &function_begin_B("_aesni_${p}rypt1");
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &$movekey ($rndkey1,&QWP(0x10,$key));
+ &cmp ($rounds,12);
+ &pxor ($inout0,$rndkey0);
+ &$movekey ($rndkey0,&QWP(0x20,$key));
+ &lea ($key,&DWP(0x30,$key));
+ &jb (&label("${p}128"));
+ &lea ($key,&DWP(0x20,$key));
+ &je (&label("${p}192"));
+ &lea ($key,&DWP(0x20,$key));
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(-0x40,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(-0x30,$key));
+ &set_label("${p}192");
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(-0x20,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(-0x10,$key));
+ &set_label("${p}128");
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(0,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0x10,$key));
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(0x20,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0x30,$key));
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(0x40,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0x50,$key));
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(0x60,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0x70,$key));
+ eval"&aes${p} ($inout0,$rndkey1)";
+ eval"&aes${p}last ($inout0,$rndkey0)";
+ &ret();
+ &function_end_B("_aesni_${p}rypt1");
+}
+
+&aesni_generate1("enc");
+# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
+&function_begin_B("${PREFIX}_encrypt");
+ &mov ("eax",&wparam(0));
+ &mov ($key,&wparam(2));
+ &movups ($inout0,&QWP(0,"eax"));
+ &mov ($rounds,&DWP(240,$key));
+ &mov ("eax",&wparam(1));
+ &call ("_aesni_encrypt1");
+ &movups (&QWP(0,"eax"),$inout0);
+ &ret ();
+&function_end_B("${PREFIX}_encrypt");
+
+&aesni_generate1("dec");
+# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key);
+&function_begin_B("${PREFIX}_decrypt");
+ &mov ("eax",&wparam(0));
+ &mov ($key,&wparam(2));
+ &movups ($inout0,&QWP(0,"eax"));
+ &mov ($rounds,&DWP(240,$key));
+ &mov ("eax",&wparam(1));
+ &call ("_aesni_decrypt1");
+ &movups (&QWP(0,"eax"),$inout0);
+ &ret ();
+&function_end_B("${PREFIX}_decrypt");
+
+# _aesni_[en|de]crypt3 are private interfaces, 3 denotes interleave
+# factor. Why 3x? Even though aes[enc|dec] latency is 6, it turned
+# out that it can be scheduled only every *second* cycle. Thus 3x
+# interleave is the one providing optimal utilization, i.e. when
+# subroutine's throughput is virtually same as of non-interleaved
+# subroutine for number of input blocks up to 3. This is why it
+# handles even double-block inputs. Larger interleave factor would
+# perform suboptimally on shorter inputs...
+
+sub aesni_generate3
+{ my $p=shift;
+
+ &function_begin_B("_aesni_${p}rypt3");
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &$movekey ($rndkey1,&QWP(16,$key));
+ &shr ($rounds,1);
+ &lea ($key,&DWP(32,$key));
+ &pxor ($inout0,$rndkey0);
+ &pxor ($inout1,$rndkey0);
+ &dec ($rounds);
+ &pxor ($inout2,$rndkey0);
+ &jmp (&label("${p}3_loop"));
+ &set_label("${p}3_loop",16);
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &$movekey ($rndkey0,&QWP(0,$key));
+ eval"&aes${p} ($inout1,$rndkey1)";
+ &dec ($rounds);
+ eval"&aes${p} ($inout2,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(16,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ &lea ($key,&DWP(32,$key));
+ eval"&aes${p} ($inout1,$rndkey0)";
+ eval"&aes${p} ($inout2,$rndkey0)";
+ &jnz (&label("${p}3_loop"));
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &$movekey ($rndkey0,&QWP(0,$key));
+ eval"&aes${p} ($inout1,$rndkey1)";
+ eval"&aes${p} ($inout2,$rndkey1)";
+ eval"&aes${p}last ($inout0,$rndkey0)";
+ eval"&aes${p}last ($inout1,$rndkey0)";
+ eval"&aes${p}last ($inout2,$rndkey0)";
+ &ret();
+ &function_end_B("_aesni_${p}rypt3");
+}
+&aesni_generate3("enc") if ($PREFIX eq "aesni");
+&aesni_generate3("dec");
+
+if ($PREFIX eq "aesni") {
+# void aesni_ecb_encrypt (const void *in, void *out,
+# size_t length, const AES_KEY *key,
+# int enc);
+
+&function_begin("aesni_ecb_encrypt");
+ &mov ($inp,&wparam(0));
+ &mov ($out,&wparam(1));
+ &mov ($len,&wparam(2));
+ &mov ($key,&wparam(3));
+ &mov ($rounds,&wparam(4));
+ &cmp ($len,16);
+ &jb (&label("ecb_ret"));
+ &and ($len,-16);
+ &test ($rounds,$rounds)
+ &mov ($rounds,&DWP(240,$key));
+ &mov ($key_,$key); # backup $key
+ &mov ($rounds_,$rounds); # backup $rounds
+ &jz (&label("ecb_decrypt"));
+
+ &sub ($len,0x30);
+ &jc (&label("ecb_enc_tail"));
+ jmp (&label("ecb_enc_loop3"));
+
+&set_label("ecb_enc_loop3",16);
+ &movups ($inout0,&QWP(0,$inp));
+ &movups ($inout1,&QWP(0x10,$inp));
+ &movups ($inout2,&QWP(0x20,$inp));
+ &lea ($inp,&DWP(0x30,$inp));
+ &call ("_aesni_encrypt3");
+ &movups (&QWP(0,$out),$inout0);
+ &sub ($len,0x30);
+ &movups (&QWP(0x10,$out),$inout0);
+ &mov ($key,$key_); # restore $key
+ &movups (&QWP(0x20,$out),$inout0);
+ &mov ($rounds,$rounds_); # restore $rounds
+ &lea ($out,&DWP(0x30,$out));
+ &jnc (&label("ecb_enc_loop3"));
+
+&set_label("ecb_enc_tail");
+ &add ($len,0x30);
+ &jz (&label("ecb_ret"));
+
+ &cmp ($len,0x10);
+ &movups ($inout0,&QWP(0,$inp));
+ je (&label("ecb_enc_one"));
+ &movups ($inout1,&QWP(0x10,$inp));
+ &call ("_aesni_encrypt3");
+ &movups (&QWP(0,$out),$inout0);
+ &movups (&QWP(0x10,$out),$inout1);
+ jmp (&label("ecb_ret"));
+
+&set_label("ecb_enc_one",16);
+ &call ("_aesni_encrypt1");
+ &movups (&QWP(0,$out),$inout0);
+ &jmp (&label("ecb_ret"));
+
+&set_label("ecb_decrypt",16);
+ &sub ($len,0x30);
+ &jc (&label("ecb_dec_tail"));
+ jmp (&label("ecb_dec_loop3"));
+
+&set_label("ecb_dec_loop3",16);
+ &movups ($inout0,&QWP(0,$inp));
+ &movups ($inout1,&QWP(0x10,$inp));
+ &movups ($inout2,&QWP(0x20,$inp));
+ &call ("_aesni_decrypt3");
+ &movups (&QWP(0,$out),$inout0);
+ &sub ($len,0x30);
+ &lea ($inp,&DWP(0x30,$inp));
+ &movups (&QWP(0x10,$out),$inout0);
+ &mov ($key,$key_); # restore $key
+ &movups (&QWP(0x20,$out),$inout0);
+ &mov ($rounds,$rounds_); # restore $rounds
+ &lea ($out,&DWP(0x30,$out));
+ &jnc (&label("ecb_dec_loop3"));
+
+&set_label("ecb_dec_tail");
+ &add ($len,0x30);
+ &jz (&label("ecb_ret"));
+
+ &cmp ($len,0x10);
+ &movups ($inout0,&QWP(0,$inp));
+ je (&label("ecb_dec_one"));
+ &movups ($inout1,&QWP(0x10,$inp));
+ &call ("_aesni_decrypt3");
+ &movups (&QWP(0,$out),$inout0);
+ &movups (&QWP(0x10,$out),$inout1);
+ jmp (&label("ecb_ret"));
+
+&set_label("ecb_dec_one",16);
+ &call ("_aesni_decrypt1");
+ &movups (&QWP(0,$out),$inout0);
+
+&set_label("ecb_ret");
+&function_end("aesni_ecb_encrypt");
+}
+
+# void $PREFIX_cbc_encrypt (const void *inp, void *out,
+# size_t length, const AES_KEY *key,
+# unsigned char *ivp,const int enc);
+&function_begin("${PREFIX}_cbc_encrypt");
+ &mov ($inp,&wparam(0));
+ &mov ($out,&wparam(1));
+ &mov ($len,&wparam(2));
+ &mov ($key,&wparam(3));
+ &test ($len,$len);
+ &mov ($key_,&wparam(4));
+ &je (&label("cbc_ret"));
+
+ &cmp (&wparam(5),0);
+ &movups ($ivec,&QWP(0,$key_)); # load IV
+ &mov ($rounds,&DWP(240,$key));
+ &mov ($key_,$key); # backup $key
+ &mov ($rounds_,$rounds); # backup $rounds
+ &je (&label("cbc_decrypt"));
+
+ &movaps ($inout0,$ivec);
+ &cmp ($len,16);
+ &jb (&label("cbc_enc_tail"));
+ &sub ($len,16);
+ &jmp (&label("cbc_enc_loop"));
+
+&set_label("cbc_enc_loop",16);
+ &movups ($ivec,&QWP(0,$inp));
+ &lea ($inp,&DWP(16,$inp));
+ &pxor ($inout0,$ivec);
+ &call ("_aesni_encrypt1");
+ &sub ($len,16);
+ &mov ($rounds,$rounds_); # restore $rounds
+ &mov ($key,$key_); # restore $key
+ &movups (&QWP(0,$out),$inout0);
+ &lea ($out,&DWP(16,$out));
+ &jnc (&label("cbc_enc_loop"));
+ &add ($len,16);
+ &jnz (&label("cbc_enc_tail"));
+ &movaps ($ivec,$inout0);
+ &jmp (&label("cbc_ret"));
+
+&set_label("cbc_enc_tail");
+ &mov ("ecx",$len); # zaps $rounds
+ &data_word(0xA4F3F689); # rep movsb
+ &mov ("ecx",16); # zero tail
+ &sub ("ecx",$len);
+ &xor ("eax","eax"); # zaps $len
+ &data_word(0xAAF3F689); # rep stosb
+ &lea ($out,&DWP(-16,$out)); # rewind $out by 1 block
+ &mov ($rounds,$rounds_); # restore $rounds
+ &mov ($inp,$out); # $inp and $out are the same
+ &mov ($key,$key_); # restore $key
+ &jmp (&label("cbc_enc_loop"));
+
+&set_label("cbc_decrypt",16);
+ &sub ($len,0x30);
+ &jc (&label("cbc_dec_tail"));
+ &jmp (&label("cbc_dec_loop3"));
+
+&set_label("cbc_dec_loop3",16);
+ &movups ($inout0,&QWP(0,$inp));
+ &movups ($inout1,&QWP(0x10,$inp));
+ &movups ($inout2,&QWP(0x20,$inp));
+ &movaps ($in0,$inout0);
+ &movaps ($in1,$inout1);
+ &call ("_aesni_decrypt3");
+ &sub ($len,0x30);
+ &lea ($inp,&DWP(0x30,$inp));
+ &pxor ($inout0,$ivec);
+ &pxor ($inout1,$in0);
+ &movups ($ivec,&QWP(0x20,$inp));
+ &pxor ($inout2,$in1);
+ &movups (&QWP(0,$out),$inout0);
+ &mov ($rounds,$rounds_) # restore $rounds
+ &movups (&QWP(0x10,$out),$inout1);
+ &mov ($key,$key_); # restore $key
+ &movups (&QWP(0x20,$out),$inout2);
+ &lea ($out,&DWP(0x30,$out));
+ &jnc (&label("cbc_dec_loop3"));
+
+&set_label("cbc_dec_tail");
+ &add ($len,0x30);
+ &jz (&label("cbc_ret"));
+
+ &movups ($inout0,&QWP(0,$inp));
+ &cmp ($len,0x10);
+ &movaps ($in0,$inout0);
+ &jbe (&label("cbc_dec_one"));
+ &movups ($inout1,&QWP(0x10,$inp));
+ &cmp ($len,0x20);
+ &movaps ($in1,$inout1);
+ &jbe (&label("cbc_dec_two"));
+ &movups ($inout2,&QWP(0x20,$inp));
+ &call ("_aesni_decrypt3");
+ &pxor ($inout0,$ivec);
+ &movups ($ivec,&QWP(0x20,$inp));
+ &pxor ($inout1,$in0);
+ &pxor ($inout2,$in1);
+ &movups (&QWP(0,$out),$inout0);
+ &movups (&QWP(0x10,$out),$inout1);
+ &movaps ($inout0,$inout2);
+ &lea ($out,&DWP(0x20,$out));
+ &jmp (&label("cbc_dec_tail_collected"));
+
+&set_label("cbc_dec_one");
+ &call ("_aesni_decrypt1");
+ &pxor ($inout0,$ivec);
+ &movaps ($ivec,$in0);
+ &jmp (&label("cbc_dec_tail_collected"));
+
+&set_label("cbc_dec_two");
+ &call ("_aesni_decrypt3");
+ &pxor ($inout0,$ivec);
+ &pxor ($inout1,$in0);
+ &movups (&QWP(0,$out),$inout0);
+ &movaps ($inout0,$inout1);
+ &movaps ($ivec,$in1);
+ &lea ($out,&DWP(0x10,$out));
+
+&set_label("cbc_dec_tail_collected");
+ &and ($len,15);
+ &jnz (&label("cbc_dec_tail_partial"));
+ &movups (&QWP(0,$out),$inout0);
+ &jmp (&label("cbc_ret"));
+
+&set_label("cbc_dec_tail_partial");
+ &mov ($key_,"esp");
+ &sub ("esp",16);
+ &and ("esp",-16);
+ &movaps (&QWP(0,"esp"),$inout0);
+ &mov ($inp,"esp");
+ &mov ("ecx",$len);
+ &data_word(0xA4F3F689); # rep movsb
+ &mov ("esp",$key_);
+
+&set_label("cbc_ret");
+ &mov ($key_,&wparam(4));
+ &movups (&QWP(0,$key_),$ivec); # output IV
+&function_end("${PREFIX}_cbc_encrypt");
+
+# Mechanical port from aesni-x86_64.pl.
+#
+# _aesni_set_encrypt_key is private interface,
+# input:
+# "eax" const unsigned char *userKey
+# $rounds int bits
+# $key AES_KEY *key
+# output:
+# "eax" return code
+# $round rounds
+
+&function_begin_B("_aesni_set_encrypt_key");
+ &test ("eax","eax");
+ &jz (&label("bad_pointer"));
+ &test ($key,$key);
+ &jz (&label("bad_pointer"));
+
+ &movups ("xmm0",&