summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2011-06-28 14:49:35 +0000
committerAndy Polyakov <appro@openssl.org>2011-06-28 14:49:35 +0000
commitfbe2e28911070a8556ad05bdfd6b6871d4a82244 (patch)
tree630188a677ff340d121965f4d5593e0747d08752
parent84968e25f31c76b0e9043002b43bdcc6cad96fc2 (diff)
AES-NI backport from HEAD. Note that e_aes.c doesn't implement all modes
from HEAD yet, more will be back-ported later.
-rwxr-xr-xConfigure4
-rw-r--r--TABLE146
-rw-r--r--crypto/aes/Makefile4
-rw-r--r--crypto/aes/asm/aesni-x86.pl2187
-rw-r--r--crypto/aes/asm/aesni-x86_64.pl3062
-rw-r--r--crypto/evp/e_aes.c387
-rw-r--r--crypto/modes/ctr128.c89
-rw-r--r--crypto/modes/modes.h9
8 files changed, 5741 insertions, 147 deletions
diff --git a/Configure b/Configure
index 962a748033..bd9a7ba789 100755
--- a/Configure
+++ b/Configure
@@ -123,11 +123,11 @@ my $tlib="-lnsl -lsocket";
my $bits1="THIRTY_TWO_BIT ";
my $bits2="SIXTY_FOUR_BIT ";
-my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o";
+my $x86_asm="x86cpuid.o:bn-586.o co-586.o x86-mont.o:des-586.o crypt586.o:aes-586.o aesni-x86.o:bf-586.o:md5-586.o:sha1-586.o sha256-586.o sha512-586.o:cast-586.o:rc4-586.o:rmd-586.o:rc5-586.o:wp_block.o wp-mmx.o:cmll-x86.o";
my $x86_elf_asm="$x86_asm:elf";
-my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
+my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o::aes-x86_64.o aesni-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o";
my $ia64_asm="ia64cpuid.o:bn-ia64.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::void";
my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o:::sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o:::::::void";
my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::void";
diff --git a/TABLE b/TABLE
index c6a5ef75db..22bb1d602e 100644
--- a/TABLE
+++ b/TABLE
@@ -228,7 +228,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -259,7 +259,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -290,7 +290,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -321,7 +321,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -383,7 +383,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -538,7 +538,7 @@ $bn_ops = DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -724,7 +724,7 @@ $bn_ops = BN_LLONG RC4_INDEX EXPORT_VAR_AS_FN RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -961,6 +961,68 @@ $ranlib =
$arflags = -X64
$multilib =
+*** android
+$cc = gcc
+$cflags = -mandroid -I$(ANDROID_DEV)/include -B$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall
+$unistd =
+$thread_cflag = -D_REENTRANT
+$sys_id =
+$lflags = -ldl
+$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
+$cpuid_obj =
+$bn_obj =
+$des_obj =
+$aes_obj =
+$bf_obj =
+$md5_obj =
+$sha1_obj =
+$cast_obj =
+$rc4_obj =
+$rmd160_obj =
+$rc5_obj =
+$wp_obj =
+$cmll_obj =
+$perlasm_scheme = void
+$dso_scheme = dlfcn
+$shared_target= linux-shared
+$shared_cflag = -fPIC
+$shared_ldflag =
+$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
+$ranlib =
+$arflags =
+$multilib =
+
+*** android-armv7
+$cc = gcc
+$cflags = -march=armv7-a -mandroid -I$(ANDROID_DEV)/include -B$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall
+$unistd =
+$thread_cflag = -D_REENTRANT
+$sys_id =
+$lflags = -ldl
+$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
+$cpuid_obj =
+$bn_obj = bn_asm.o armv4-mont.o
+$des_obj =
+$aes_obj = aes_cbc.o aes-armv4.o
+$bf_obj =
+$md5_obj =
+$sha1_obj = sha1-armv4-large.o sha256-armv4.o sha512-armv4.o
+$cast_obj =
+$rc4_obj =
+$rmd160_obj =
+$rc5_obj =
+$wp_obj =
+$cmll_obj =
+$perlasm_scheme = void
+$dso_scheme = dlfcn
+$shared_target= linux-shared
+$shared_cflag = -fPIC
+$shared_ldflag =
+$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
+$ranlib =
+$arflags =
+$multilib =
+
*** aux3-gcc
$cc = gcc
$cflags = -O2 -DTERMIO
@@ -1003,7 +1065,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1034,7 +1096,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1065,7 +1127,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1189,7 +1251,7 @@ $bn_ops = BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1282,7 +1344,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -1344,7 +1406,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1406,7 +1468,7 @@ $bn_ops = BN_LLONG RC4_INDEX EXPORT_VAR_AS_FN RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1685,7 +1747,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1716,7 +1778,7 @@ $bn_ops = BN_LLONG RC4_INT RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1840,7 +1902,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1871,7 +1933,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1964,7 +2026,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -1995,7 +2057,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -2119,7 +2181,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -2150,7 +2212,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -2181,7 +2243,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -2212,7 +2274,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -2367,7 +2429,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -2398,7 +2460,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -2429,7 +2491,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -2584,7 +2646,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3111,7 +3173,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3452,7 +3514,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3514,7 +3576,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3607,7 +3669,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3886,7 +3948,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -3948,7 +4010,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT EXPORT_V
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -3979,7 +4041,7 @@ $bn_ops = SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT EXPORT_VAR_AS_FN
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -4537,7 +4599,7 @@ $bn_ops = DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -4568,7 +4630,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -4816,7 +4878,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -4909,7 +4971,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -4940,7 +5002,7 @@ $bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o
$des_obj =
-$aes_obj = aes-x86_64.o
+$aes_obj = aes-x86_64.o aesni-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
@@ -5250,7 +5312,7 @@ $bn_ops = BN_LLONG MD2_CHAR RC4_INDEX DES_PTR DES_RISC1 DES_UNROLL
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
@@ -5281,7 +5343,7 @@ $bn_ops = BN_LLONG DES_PTR DES_RISC1 DES_UNROLL RC4_INDEX MD2_INT
$cpuid_obj = x86cpuid.o
$bn_obj = bn-586.o co-586.o x86-mont.o
$des_obj = des-586.o crypt586.o
-$aes_obj = aes-586.o
+$aes_obj = aes-586.o aesni-x86.o
$bf_obj = bf-586.o
$md5_obj = md5-586.o
$sha1_obj = sha1-586.o sha256-586.o sha512-586.o
diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile
index c501a43a8f..3517465bd0 100644
--- a/crypto/aes/Makefile
+++ b/crypto/aes/Makefile
@@ -50,9 +50,13 @@ aes-ia64.s: asm/aes-ia64.S
aes-586.s: asm/aes-586.pl ../perlasm/x86asm.pl
$(PERL) asm/aes-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
+aesni-x86.s: asm/aesni-x86.pl ../perlasm/x86asm.pl
+ $(PERL) asm/aesni-x86.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
aes-x86_64.s: asm/aes-x86_64.pl
$(PERL) asm/aes-x86_64.pl $(PERLASM_SCHEME) > $@
+aesni-x86_64.s: asm/aesni-x86_64.pl
+ $(PERL) asm/aesni-x86_64.pl $(PERLASM_SCHEME) > $@
aes-sparcv9.s: asm/aes-sparcv9.pl
$(PERL) asm/aes-sparcv9.pl $(CFLAGS) > $@
diff --git a/crypto/aes/asm/aesni-x86.pl b/crypto/aes/asm/aesni-x86.pl
new file mode 100644
index 0000000000..b3c8d1f60a
--- /dev/null
+++ b/crypto/aes/asm/aesni-x86.pl
@@ -0,0 +1,2187 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# This module implements support for Intel AES-NI extension. In
+# OpenSSL context it's used with Intel engine, but can also be used as
+# drop-in replacement for crypto/aes/asm/aes-586.pl [see below for
+# details].
+#
+# Performance.
+#
+# To start with see corresponding paragraph in aesni-x86_64.pl...
+# Instead of filling table similar to one found there I've chosen to
+# summarize *comparison* results for raw ECB, CTR and CBC benchmarks.
+# The simplified table below represents 32-bit performance relative
+# to 64-bit one in every given point. Ratios vary for different
+# encryption modes, therefore interval values.
+#
+# 16-byte 64-byte 256-byte 1-KB 8-KB
+# 53-67% 67-84% 91-94% 95-98% 97-99.5%
+#
+# Lower ratios for smaller block sizes are perfectly understandable,
+# because function call overhead is higher in 32-bit mode. Largest
+# 8-KB block performance is virtually same: 32-bit code is less than
+# 1% slower for ECB, CBC and CCM, and ~3% slower otherwise.
+
+# January 2011
+#
+# See aesni-x86_64.pl for details. Unlike x86_64 version this module
+# interleaves at most 6 aes[enc|dec] instructions, because there are
+# not enough registers for 8x interleave [which should be optimal for
+# Sandy Bridge]. Actually, performance results for 6x interleave
+# factor presented in aesni-x86_64.pl (except for CTR) are for this
+# module.
+
+# April 2011
+#
+# Add aesni_xts_[en|de]crypt. Westmere spends 1.50 cycles processing
+# one byte out of 8KB with 128-bit key, Sandy Bridge - 1.09.
+
+$PREFIX="aesni"; # if $PREFIX is set to "AES", the script
+ # generates drop-in replacement for
+ # crypto/aes/asm/aes-586.pl:-)
+$inline=1; # inline _aesni_[en|de]crypt
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86asm.pl";
+
+&asm_init($ARGV[0],$0);
+
+if ($PREFIX eq "aesni") { $movekey=*movups; }
+else { $movekey=*movups; }
+
+$len="eax";
+$rounds="ecx";
+$key="edx";
+$inp="esi";
+$out="edi";
+$rounds_="ebx"; # backup copy for $rounds
+$key_="ebp"; # backup copy for $key
+
+$rndkey0="xmm0";
+$rndkey1="xmm1";
+$inout0="xmm2";
+$inout1="xmm3";
+$inout2="xmm4";
+$inout3="xmm5"; $in1="xmm5";
+$inout4="xmm6"; $in0="xmm6";
+$inout5="xmm7"; $ivec="xmm7";
+
+# AESNI extenstion
+sub aeskeygenassist
+{ my($dst,$src,$imm)=@_;
+ if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
+ { &data_byte(0x66,0x0f,0x3a,0xdf,0xc0|($1<<3)|$2,$imm); }
+}
+sub aescommon
+{ my($opcodelet,$dst,$src)=@_;
+ if ("$dst:$src" =~ /xmm([0-7]):xmm([0-7])/)
+ { &data_byte(0x66,0x0f,0x38,$opcodelet,0xc0|($1<<3)|$2);}
+}
+sub aesimc { aescommon(0xdb,@_); }
+sub aesenc { aescommon(0xdc,@_); }
+sub aesenclast { aescommon(0xdd,@_); }
+sub aesdec { aescommon(0xde,@_); }
+sub aesdeclast { aescommon(0xdf,@_); }
+
+# Inline version of internal aesni_[en|de]crypt1
+{ my $sn;
+sub aesni_inline_generate1
+{ my ($p,$inout,$ivec)=@_; $inout=$inout0 if (!defined($inout));
+ $sn++;
+
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &$movekey ($rndkey1,&QWP(16,$key));
+ &xorps ($ivec,$rndkey0) if (defined($ivec));
+ &lea ($key,&DWP(32,$key));
+ &xorps ($inout,$ivec) if (defined($ivec));
+ &xorps ($inout,$rndkey0) if (!defined($ivec));
+ &set_label("${p}1_loop_$sn");
+ eval"&aes${p} ($inout,$rndkey1)";
+ &dec ($rounds);
+ &$movekey ($rndkey1,&QWP(0,$key));
+ &lea ($key,&DWP(16,$key));
+ &jnz (&label("${p}1_loop_$sn"));
+ eval"&aes${p}last ($inout,$rndkey1)";
+}}
+
+sub aesni_generate1 # fully unrolled loop
+{ my ($p,$inout)=@_; $inout=$inout0 if (!defined($inout));
+
+ &function_begin_B("_aesni_${p}rypt1");
+ &movups ($rndkey0,&QWP(0,$key));
+ &$movekey ($rndkey1,&QWP(0x10,$key));
+ &xorps ($inout,$rndkey0);
+ &$movekey ($rndkey0,&QWP(0x20,$key));
+ &lea ($key,&DWP(0x30,$key));
+ &cmp ($rounds,11);
+ &jb (&label("${p}128"));
+ &lea ($key,&DWP(0x20,$key));
+ &je (&label("${p}192"));
+ &lea ($key,&DWP(0x20,$key));
+ eval"&aes${p} ($inout,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(-0x40,$key));
+ eval"&aes${p} ($inout,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(-0x30,$key));
+ &set_label("${p}192");
+ eval"&aes${p} ($inout,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(-0x20,$key));
+ eval"&aes${p} ($inout,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(-0x10,$key));
+ &set_label("${p}128");
+ eval"&aes${p} ($inout,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(0,$key));
+ eval"&aes${p} ($inout,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0x10,$key));
+ eval"&aes${p} ($inout,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(0x20,$key));
+ eval"&aes${p} ($inout,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0x30,$key));
+ eval"&aes${p} ($inout,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(0x40,$key));
+ eval"&aes${p} ($inout,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0x50,$key));
+ eval"&aes${p} ($inout,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(0x60,$key));
+ eval"&aes${p} ($inout,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0x70,$key));
+ eval"&aes${p} ($inout,$rndkey1)";
+ eval"&aes${p}last ($inout,$rndkey0)";
+ &ret();
+ &function_end_B("_aesni_${p}rypt1");
+}
+
+# void $PREFIX_encrypt (const void *inp,void *out,const AES_KEY *key);
+&aesni_generate1("enc") if (!$inline);
+&function_begin_B("${PREFIX}_encrypt");
+ &mov ("eax",&wparam(0));
+ &mov ($key,&wparam(2));
+ &movups ($inout0,&QWP(0,"eax"));
+ &mov ($rounds,&DWP(240,$key));
+ &mov ("eax",&wparam(1));
+ if ($inline)
+ { &aesni_inline_generate1("enc"); }
+ else
+ { &call ("_aesni_encrypt1"); }
+ &movups (&QWP(0,"eax"),$inout0);
+ &ret ();
+&function_end_B("${PREFIX}_encrypt");
+
+# void $PREFIX_decrypt (const void *inp,void *out,const AES_KEY *key);
+&aesni_generate1("dec") if(!$inline);
+&function_begin_B("${PREFIX}_decrypt");
+ &mov ("eax",&wparam(0));
+ &mov ($key,&wparam(2));
+ &movups ($inout0,&QWP(0,"eax"));
+ &mov ($rounds,&DWP(240,$key));
+ &mov ("eax",&wparam(1));
+ if ($inline)
+ { &aesni_inline_generate1("dec"); }
+ else
+ { &call ("_aesni_decrypt1"); }
+ &movups (&QWP(0,"eax"),$inout0);
+ &ret ();
+&function_end_B("${PREFIX}_decrypt");
+
+# _aesni_[en|de]cryptN are private interfaces, N denotes interleave
+# factor. Why 3x subroutine were originally used in loops? Even though
+# aes[enc|dec] latency was originally 6, it could be scheduled only
+# every *2nd* cycle. Thus 3x interleave was the one providing optimal
+# utilization, i.e. when subroutine's throughput is virtually same as
+# of non-interleaved subroutine [for number of input blocks up to 3].
+# This is why it makes no sense to implement 2x subroutine.
+# aes[enc|dec] latency in next processor generation is 8, but the
+# instructions can be scheduled every cycle. Optimal interleave for
+# new processor is therefore 8x, but it's unfeasible to accommodate it
+# in XMM registers addreassable in 32-bit mode and therefore 6x is
+# used instead...
+
+sub aesni_generate3
+{ my $p=shift;
+
+ &function_begin_B("_aesni_${p}rypt3");
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &shr ($rounds,1);
+ &$movekey ($rndkey1,&QWP(16,$key));
+ &lea ($key,&DWP(32,$key));
+ &xorps ($inout0,$rndkey0);
+ &pxor ($inout1,$rndkey0);
+ &pxor ($inout2,$rndkey0);
+ &$movekey ($rndkey0,&QWP(0,$key));
+
+ &set_label("${p}3_loop");
+ eval"&aes${p} ($inout0,$rndkey1)";
+ eval"&aes${p} ($inout1,$rndkey1)";
+ &dec ($rounds);
+ eval"&aes${p} ($inout2,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(16,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ eval"&aes${p} ($inout1,$rndkey0)";
+ &lea ($key,&DWP(32,$key));
+ eval"&aes${p} ($inout2,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &jnz (&label("${p}3_loop"));
+ eval"&aes${p} ($inout0,$rndkey1)";
+ eval"&aes${p} ($inout1,$rndkey1)";
+ eval"&aes${p} ($inout2,$rndkey1)";
+ eval"&aes${p}last ($inout0,$rndkey0)";
+ eval"&aes${p}last ($inout1,$rndkey0)";
+ eval"&aes${p}last ($inout2,$rndkey0)";
+ &ret();
+ &function_end_B("_aesni_${p}rypt3");
+}
+
+# 4x interleave is implemented to improve small block performance,
+# most notably [and naturally] 4 block by ~30%. One can argue that one
+# should have implemented 5x as well, but improvement would be <20%,
+# so it's not worth it...
+sub aesni_generate4
+{ my $p=shift;
+
+ &function_begin_B("_aesni_${p}rypt4");
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &$movekey ($rndkey1,&QWP(16,$key));
+ &shr ($rounds,1);
+ &lea ($key,&DWP(32,$key));
+ &xorps ($inout0,$rndkey0);
+ &pxor ($inout1,$rndkey0);
+ &pxor ($inout2,$rndkey0);
+ &pxor ($inout3,$rndkey0);
+ &$movekey ($rndkey0,&QWP(0,$key));
+
+ &set_label("${p}4_loop");
+ eval"&aes${p} ($inout0,$rndkey1)";
+ eval"&aes${p} ($inout1,$rndkey1)";
+ &dec ($rounds);
+ eval"&aes${p} ($inout2,$rndkey1)";
+ eval"&aes${p} ($inout3,$rndkey1)";
+ &$movekey ($rndkey1,&QWP(16,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ eval"&aes${p} ($inout1,$rndkey0)";
+ &lea ($key,&DWP(32,$key));
+ eval"&aes${p} ($inout2,$rndkey0)";
+ eval"&aes${p} ($inout3,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &jnz (&label("${p}4_loop"));
+
+ eval"&aes${p} ($inout0,$rndkey1)";
+ eval"&aes${p} ($inout1,$rndkey1)";
+ eval"&aes${p} ($inout2,$rndkey1)";
+ eval"&aes${p} ($inout3,$rndkey1)";
+ eval"&aes${p}last ($inout0,$rndkey0)";
+ eval"&aes${p}last ($inout1,$rndkey0)";
+ eval"&aes${p}last ($inout2,$rndkey0)";
+ eval"&aes${p}last ($inout3,$rndkey0)";
+ &ret();
+ &function_end_B("_aesni_${p}rypt4");
+}
+
+sub aesni_generate6
+{ my $p=shift;
+
+ &function_begin_B("_aesni_${p}rypt6");
+ &static_label("_aesni_${p}rypt6_enter");
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &shr ($rounds,1);
+ &$movekey ($rndkey1,&QWP(16,$key));
+ &lea ($key,&DWP(32,$key));
+ &xorps ($inout0,$rndkey0);
+ &pxor ($inout1,$rndkey0); # pxor does better here
+ eval"&aes${p} ($inout0,$rndkey1)";
+ &pxor ($inout2,$rndkey0);
+ eval"&aes${p} ($inout1,$rndkey1)";
+ &pxor ($inout3,$rndkey0);
+ &dec ($rounds);
+ eval"&aes${p} ($inout2,$rndkey1)";
+ &pxor ($inout4,$rndkey0);
+ eval"&aes${p} ($inout3,$rndkey1)";
+ &pxor ($inout5,$rndkey0);
+ eval"&aes${p} ($inout4,$rndkey1)";
+ &$movekey ($rndkey0,&QWP(0,$key));
+ eval"&aes${p} ($inout5,$rndkey1)";
+ &jmp (&label("_aesni_${p}rypt6_enter"));
+
+ &set_label("${p}6_loop",16);
+ eval"&aes${p} ($inout0,$rndkey1)";
+ eval"&aes${p} ($inout1,$rndkey1)";
+ &dec ($rounds);
+ eval"&aes${p} ($inout2,$rndkey1)";
+ eval"&aes${p} ($inout3,$rndkey1)";
+ eval"&aes${p} ($inout4,$rndkey1)";
+ eval"&aes${p} ($inout5,$rndkey1)";
+ &set_label("_aesni_${p}rypt6_enter",16);
+ &$movekey ($rndkey1,&QWP(16,$key));
+ eval"&aes${p} ($inout0,$rndkey0)";
+ eval"&aes${p} ($inout1,$rndkey0)";
+ &lea ($key,&DWP(32,$key));
+ eval"&aes${p} ($inout2,$rndkey0)";
+ eval"&aes${p} ($inout3,$rndkey0)";
+ eval"&aes${p} ($inout4,$rndkey0)";
+ eval"&aes${p} ($inout5,$rndkey0)";
+ &$movekey ($rndkey0,&QWP(0,$key));
+ &jnz (&label("${p}6_loop"));
+
+ eval"&aes${p} ($inout0,$rndkey1)";
+ eval"&aes${p} ($inout1,$rndkey1)";
+ eval"&aes${p} ($inout2,$rndkey1)";
+ eval"&aes${p} ($inout3,$rndkey1)";
+ eval"&aes${p} ($inout4,$rndkey1)";
+ eval"&aes${p} ($inout5,$rndkey1)";
+ eval"&aes${p}last ($inout0,$rndkey0)";
+ eval"&aes${p}last ($inout1,$rndkey0)";
+ eval"&aes${p}last ($inout2,$rndkey0)";
+ eval"&aes${p}last ($inout3,$rndkey0)";
+ eval"&aes${p}last ($inout4,$rndkey0)";
+ eval"&aes${p}last ($inout5,$rndkey0)";
+ &ret();
+ &function_end_B("_aesni_${p}rypt6");
+}
+&aesni_generate3("enc") if ($PREFIX eq "aesni");
+&aesni_generate3("dec");
+&aesni_generate4("enc") if ($PREFIX eq "aesni");
+&aesni_generate4("dec");
+&aesni_generate6("enc") if ($PREFIX eq "aesni");
+&aesni_generate6("dec");
+
+if ($PREFIX eq "aesni") {
+######################################################################
+# void aesni_ecb_encrypt (const void *in, void *out,
+# size_t length, const AES_KEY *key,
+# int enc);
+&function_begin("aesni_ecb_encrypt");
+ &mov ($inp,&wparam(0));
+ &mov ($out,&wparam(1));
+ &mov ($len,&wparam(2));
+ &mov ($key,&wparam(3));
+ &mov ($rounds_,&wparam(4));
+ &and ($len,-16);
+ &jz (&label("ecb_ret"));
+ &mov ($rounds,&DWP(240,$key));
+ &test ($rounds_,$rounds_);
+ &jz (&label("ecb_decrypt"));
+
+ &mov ($key_,$key); # backup $key
+ &mov ($rounds_,$rounds); # backup $rounds
+ &cmp ($len,0x60);
+ &jb (&label("ecb_enc_tail"));
+
+ &movdqu ($inout0,&QWP(0,$inp));
+ &movdqu ($inout1,&QWP(0x10,$inp));
+ &movdqu ($inout2,&QWP(0x20,$inp));
+ &movdqu ($inout3,&QWP(0x30,$inp));
+ &movdqu ($inout4,&QWP(0x40,$inp));
+ &movdqu ($inout5,&QWP(0x50,$inp));
+ &lea ($inp,&DWP(0x60,$inp));
+ &sub ($len,0x60);
+ &jmp (&label("ecb_enc_loop6_enter"));
+
+&set_label("ecb_enc_loop6",16);
+ &movups (&QWP(0,$out),$inout0);
+ &movdqu ($inout0,&QWP(0,$inp));
+ &movups (&QWP(0x10,$out),$inout1);
+ &movdqu ($inout1,&QWP(0x10,$inp));
+ &movups (&QWP(0x20,$out),$inout2);
+ &movdqu ($inout2,&QWP(0x20,$inp));
+ &movups (&QWP(0x30,$out),$inout3);
+ &movdqu ($inout3,&QWP(0x30,$inp));
+ &movups (&QWP(0x40,$out),$inout4);
+ &movdqu ($inout4,&QWP(0x40,$inp));
+ &movups (&QWP(0x50,$out),$inout5);
+ &lea ($out,&DWP(0x60,$out));
+ &movdqu ($inout5,&QWP(0x50,$inp));
+ &lea ($inp,&DWP(0x60,$inp));
+&set_label("ecb_enc_loop6_enter");
+
+ &call ("_aesni_encrypt6"