summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGES4
-rwxr-xr-xConfigure14
-rw-r--r--TABLE132
-rwxr-xr-xconfig10
-rw-r--r--crypto/aes/asm/aes-mips.pl811
-rw-r--r--crypto/sha/asm/sha1-mips.pl96
-rw-r--r--crypto/sha/asm/sha512-mips.pl79
7 files changed, 984 insertions, 162 deletions
diff --git a/CHANGES b/CHANGES
index 3c61e39919..f835089fa4 100644
--- a/CHANGES
+++ b/CHANGES
@@ -4,6 +4,10 @@
Changes between 1.0.1 and 1.0.2 [xx XXX xxxx]
+ *) MIPS assembly pack updates: support for MIPS32r2 and SmartMIPS ASE,
+ platform support for Linux and Android.
+ [Andy Polyakov]
+
*) Call OCSP Stapling callback after ciphersuite has been chosen, so
the right response is stapled. Also change current certificate to
the certificate actually sent.
diff --git a/Configure b/Configure
index e6f799465c..2d5e25f59f 100755
--- a/Configure
+++ b/Configure
@@ -348,6 +348,13 @@ my %table=(
# It's believed that majority of ARM toolchains predefine appropriate -march.
# If you compiler does not, do complement config command line with one!
"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+# Configure script adds minimally required -march for assembly support,
+# if no -march was specified at command line. mips32 and mips64 below
+# refer to contemporary MIPS Architecture specifications, MIPS32 and
+# MIPS64, rather than to kernel bitness.
+"linux-mips32", "gcc:-mabi=32 -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:o32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"linux-mips64", "gcc:-mabi=n32 -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips64_asm}:n32:dlfcn:linux-shared:-fPIC:-mabi=n32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::32",
+"linux64-mips64", "gcc:-mabi=64 -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips64_asm}:64:dlfcn:linux-shared:-fPIC:-mabi=64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64",
#### IA-32 targets...
"linux-ia32-icc", "icc:-DL_ENDIAN -DTERMIO -O2 -no_cpprt::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-KPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-elf", "gcc:-DL_ENDIAN -DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@@ -406,6 +413,7 @@ my %table=(
"android","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"android-x86","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:".eval{my $asm=${x86_elf_asm};$asm=~s/:elf/:android/;$asm}.":dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"android-armv7","gcc:-march=armv7-a -mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
+"android-mips","gcc:-mandroid -I\$(ANDROID_DEV)/include -B\$(ANDROID_DEV)/lib -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${mips32_asm}:o32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
#### *BSD [do see comment about ${BSDthreads} above!]
"BSD-generic32","gcc:-DTERMIOS -O3 -fomit-frame-pointer -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
@@ -1202,6 +1210,12 @@ if ($target =~ /^mingw/ && `$cc --target-help 2>&1` !~ m/\-mno\-cygwin/m)
$shared_ldflag =~ s/\-mno\-cygwin\s*//;
}
+if ($target =~ /linux.*\-mips/ && !$no_asm && $flags !~ /\-m(ips|arch=)/) {
+ # minimally required architecture flags for assembly modules
+ $cflags="-mips2 $cflags" if ($target =~ /mips32/);
+ $cflags="-mips3 $cflags" if ($target =~ /mips64/);
+}
+
my $no_shared_warn=0;
my $no_user_cflags=0;
diff --git a/TABLE b/TABLE
index ca6591890e..33e7eb48d7 100644
--- a/TABLE
+++ b/TABLE
@@ -1089,6 +1089,39 @@ $ranlib =
$arflags =
$multilib =
+*** android-mips
+$cc = gcc
+$cflags = -mandroid -I$(ANDROID_DEV)/include -B$(ANDROID_DEV)/lib -O3 -Wall
+$unistd =
+$thread_cflag = -D_REENTRANT
+$sys_id =
+$lflags = -ldl
+$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
+$cpuid_obj =
+$bn_obj = bn-mips.o mips-mont.o
+$des_obj =
+$aes_obj = aes_cbc.o aes-mips.o
+$bf_obj =
+$md5_obj =
+$sha1_obj = sha1-mips.o sha256-mips.o
+$cast_obj =
+$rc4_obj =
+$rmd160_obj =
+$rc5_obj =
+$wp_obj =
+$cmll_obj =
+$modes_obj =
+$engines_obj =
+$perlasm_scheme = o32
+$dso_scheme = dlfcn
+$shared_target= linux-shared
+$shared_cflag = -fPIC
+$shared_ldflag =
+$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
+$ranlib =
+$arflags =
+$multilib =
+
*** android-x86
$cc = gcc
$cflags = -mandroid -I$(ANDROID_DEV)/include -B$(ANDROID_DEV)/lib -O3 -fomit-frame-pointer -Wall
@@ -4191,6 +4224,72 @@ $ranlib =
$arflags =
$multilib =
+*** linux-mips32
+$cc = gcc
+$cflags = -mabi=32 -DTERMIO -O3 -Wall
+$unistd =
+$thread_cflag = -D_REENTRANT
+$sys_id =
+$lflags = -ldl
+$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
+$cpuid_obj =
+$bn_obj = bn-mips.o mips-mont.o
+$des_obj =
+$aes_obj = aes_cbc.o aes-mips.o
+$bf_obj =
+$md5_obj =
+$sha1_obj = sha1-mips.o sha256-mips.o
+$cast_obj =
+$rc4_obj =
+$rmd160_obj =
+$rc5_obj =
+$wp_obj =
+$cmll_obj =
+$modes_obj =
+$engines_obj =
+$perlasm_scheme = o32
+$dso_scheme = dlfcn
+$shared_target= linux-shared
+$shared_cflag = -fPIC
+$shared_ldflag =
+$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
+$ranlib =
+$arflags =
+$multilib =
+
+*** linux-mips64
+$cc = gcc
+$cflags = -mabi=n32 -DTERMIO -O3 -Wall
+$unistd =
+$thread_cflag = -D_REENTRANT
+$sys_id =
+$lflags = -ldl
+$bn_ops = SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
+$cpuid_obj =
+$bn_obj = bn-mips.o mips-mont.o
+$des_obj =
+$aes_obj = aes_cbc.o aes-mips.o
+$bf_obj =
+$md5_obj =
+$sha1_obj = sha1-mips.o sha256-mips.o sha512-mips.o
+$cast_obj =
+$rc4_obj =
+$rmd160_obj =
+$rc5_obj =
+$wp_obj =
+$cmll_obj =
+$modes_obj =
+$engines_obj =
+$perlasm_scheme = n32
+$dso_scheme = dlfcn
+$shared_target= linux-shared
+$shared_cflag = -fPIC
+$shared_ldflag = -mabi=n32
+$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
+$ranlib =
+$arflags =
+$multilib = 32
+
*** linux-ppc
$cc = gcc
$cflags = -DB_ENDIAN -DTERMIO -O3 -Wall
@@ -4422,6 +4521,39 @@ $ranlib =
$arflags =
$multilib = /highgprs
+*** linux64-mips64
+$cc = gcc
+$cflags = -mabi=64 -DTERMIO -O3 -Wall
+$unistd =
+$thread_cflag = -D_REENTRANT
+$sys_id =
+$lflags = -ldl
+$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR
+$cpuid_obj =
+$bn_obj = bn-mips.o mips-mont.o
+$des_obj =
+$aes_obj = aes_cbc.o aes-mips.o
+$bf_obj =
+$md5_obj =
+$sha1_obj = sha1-mips.o sha256-mips.o sha512-mips.o
+$cast_obj =
+$rc4_obj =
+$rmd160_obj =
+$rc5_obj =
+$wp_obj =
+$cmll_obj =
+$modes_obj =
+$engines_obj =
+$perlasm_scheme = 64
+$dso_scheme = dlfcn
+$shared_target= linux-shared
+$shared_cflag = -fPIC
+$shared_ldflag = -mabi=64
+$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
+$ranlib =
+$arflags =
+$multilib = 64
+
*** linux64-s390x
$cc = gcc
$cflags = -m64 -DB_ENDIAN -DTERMIO -O3 -Wall
diff --git a/config b/config
index 88b9bc69da..b7344d18e5 100755
--- a/config
+++ b/config
@@ -596,6 +596,16 @@ case "$GUESSOS" in
OUT="linux-ppc"
;;
ppc-*-linux2) OUT="linux-ppc" ;;
+ mips64*-*-linux2)
+ echo "WARNING! If you wish to build 64-bit library, then you have to"
+ echo " invoke './Configure linux64-mips64' *manually*."
+ if [ "$TEST" = "false" -a -t 1 ]; then
+ echo " You have about 5 seconds to press Ctrl-C to abort."
+ (trap "stty `stty -g`" 2 0; stty -icanon min 0 time 50; read waste) <&1
+ fi
+ OUT="linux-mips64"
+ ;;
+ mips*-*-linux2) OUT="linux-mips32" ;;
ppc60x-*-vxworks*) OUT="vxworks-ppc60x" ;;
ppcgen-*-vxworks*) OUT="vxworks-ppcgen" ;;
pentium-*-vxworks*) OUT="vxworks-pentium" ;;
diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl
index 07ac70e77b..1fdc6bf85c 100644
--- a/crypto/aes/asm/aes-mips.pl
+++ b/crypto/aes/asm/aes-mips.pl
@@ -20,6 +20,13 @@
# thing about this module is its endian neutrality, which means that
# it processes data without ever changing byte order...
+# September 2012
+#
+# Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
+# ~25% less instructions) code. Note that there is no run-time switch,
+# instead, code path is chosen upon pre-process time, pass -mips32r2
+# or/and -msmartmips.
+
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
@@ -52,6 +59,7 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
$PTR_ADD="dadd"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32
+ $PTR_INS="dins";
$REG_S="sd";
$REG_L="ld";
$PTR_SLL="dsll"; # incidentally works even on n32
@@ -59,6 +67,7 @@ if ($flavour =~ /64|n32/i) {
} else {
$PTR_ADD="add";
$PTR_SUB="sub";
+ $PTR_INS="ins";
$REG_S="sw";
$REG_L="lw";
$PTR_SLL="sll";
@@ -89,7 +98,11 @@ $code.=<<___;
# include <openssl/fipssyms.h>
#endif
-#if !defined(__vxworks) || defined(__pic__)
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
+#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
.option pic2
#endif
.set noat
@@ -125,6 +138,89 @@ _mips_AES_encrypt:
xor $s3,$t3
sub $cnt,1
+#if defined(__mips_smartmips)
+ ext $i0,$s1,16,8
+.Loop_enc:
+ ext $i1,$s2,16,8
+ ext $i2,$s3,16,8
+ ext $i3,$s0,16,8
+ lwxs $t0,$i0($Tbl) # Te1[s1>>16]
+ ext $i0,$s2,8,8
+ lwxs $t1,$i1($Tbl) # Te1[s2>>16]
+ ext $i1,$s3,8,8
+ lwxs $t2,$i2($Tbl) # Te1[s3>>16]
+ ext $i2,$s0,8,8
+ lwxs $t3,$i3($Tbl) # Te1[s0>>16]
+ ext $i3,$s1,8,8
+
+ lwxs $t4,$i0($Tbl) # Te2[s2>>8]
+ ext $i0,$s3,0,8
+ lwxs $t5,$i1($Tbl) # Te2[s3>>8]
+ ext $i1,$s0,0,8
+ lwxs $t6,$i2($Tbl) # Te2[s0>>8]
+ ext $i2,$s1,0,8
+ lwxs $t7,$i3($Tbl) # Te2[s1>>8]
+ ext $i3,$s2,0,8
+
+ lwxs $t8,$i0($Tbl) # Te3[s3]
+ ext $i0,$s0,24,8
+ lwxs $t9,$i1($Tbl) # Te3[s0]
+ ext $i1,$s1,24,8
+ lwxs $t10,$i2($Tbl) # Te3[s1]
+ ext $i2,$s2,24,8
+ lwxs $t11,$i3($Tbl) # Te3[s2]
+ ext $i3,$s3,24,8
+
+ rotr $t0,$t0,8
+ rotr $t1,$t1,8
+ rotr $t2,$t2,8
+ rotr $t3,$t3,8
+
+ rotr $t4,$t4,16
+ rotr $t5,$t5,16
+ rotr $t6,$t6,16
+ rotr $t7,$t7,16
+
+ xor $t0,$t4
+ lwxs $t4,$i0($Tbl) # Te0[s0>>24]
+ xor $t1,$t5
+ lwxs $t5,$i1($Tbl) # Te0[s1>>24]
+ xor $t2,$t6
+ lwxs $t6,$i2($Tbl) # Te0[s2>>24]
+ xor $t3,$t7
+ lwxs $t7,$i3($Tbl) # Te0[s3>>24]
+
+ rotr $t8,$t8,24
+ lw $s0,0($key0)
+ rotr $t9,$t9,24
+ lw $s1,4($key0)
+ rotr $t10,$t10,24
+ lw $s2,8($key0)
+ rotr $t11,$t11,24
+ lw $s3,12($key0)
+
+ xor $t0,$t8
+ xor $t1,$t9
+ xor $t2,$t10
+ xor $t3,$t11
+
+ xor $t0,$t4
+ xor $t1,$t5
+ xor $t2,$t6
+ xor $t3,$t7
+
+ sub $cnt,1
+ $PTR_ADD $key0,16
+ xor $s0,$t0
+ xor $s1,$t1
+ xor $s2,$t2
+ xor $s3,$t3
+ .set noreorder
+ bnez $cnt,.Loop_enc
+ ext $i0,$s1,16,8
+
+ _xtr $i0,$s1,16-2
+#else
_xtr $i0,$s1,16-2
.Loop_enc:
_xtr $i1,$s2,16-2
@@ -138,19 +234,29 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+ lw $t0,0($i0) # Te1[s1>>16]
+ _xtr $i0,$s2,8-2
+ lw $t1,0($i1) # Te1[s2>>16]
+ _xtr $i1,$s3,8-2
+ lw $t2,0($i2) # Te1[s3>>16]
+ _xtr $i2,$s0,8-2
+ lw $t3,0($i3) # Te1[s0>>16]
+ _xtr $i3,$s1,8-2
+#else
lwl $t0,3($i0) # Te1[s1>>16]
lwl $t1,3($i1) # Te1[s2>>16]
lwl $t2,3($i2) # Te1[s3>>16]
lwl $t3,3($i3) # Te1[s0>>16]
lwr $t0,2($i0) # Te1[s1>>16]
- lwr $t1,2($i1) # Te1[s2>>16]
- lwr $t2,2($i2) # Te1[s3>>16]
- lwr $t3,2($i3) # Te1[s0>>16]
-
_xtr $i0,$s2,8-2
+ lwr $t1,2($i1) # Te1[s2>>16]
_xtr $i1,$s3,8-2
+ lwr $t2,2($i2) # Te1[s3>>16]
_xtr $i2,$s0,8-2
+ lwr $t3,2($i3) # Te1[s0>>16]
_xtr $i3,$s1,8-2
+#endif
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
@@ -159,19 +265,88 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+ rotr $t0,$t0,8
+ rotr $t1,$t1,8
+ rotr $t2,$t2,8
+ rotr $t3,$t3,8
+# if defined(_MIPSEL)
+ lw $t4,0($i0) # Te2[s2>>8]
+ _xtr $i0,$s3,0-2
+ lw $t5,0($i1) # Te2[s3>>8]
+ _xtr $i1,$s0,0-2
+ lw $t6,0($i2) # Te2[s0>>8]
+ _xtr $i2,$s1,0-2
+ lw $t7,0($i3) # Te2[s1>>8]
+ _xtr $i3,$s2,0-2
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+ lw $t8,0($i0) # Te3[s3]
+ $PTR_INS $i0,$s0,2,8
+ lw $t9,0($i1) # Te3[s0]
+ $PTR_INS $i1,$s1,2,8
+ lw $t10,0($i2) # Te3[s1]
+ $PTR_INS $i2,$s2,2,8
+ lw $t11,0($i3) # Te3[s2]
+ $PTR_INS $i3,$s3,2,8
+# else
+ lw $t4,0($i0) # Te2[s2>>8]
+ $PTR_INS $i0,$s3,2,8
+ lw $t5,0($i1) # Te2[s3>>8]
+ $PTR_INS $i1,$s0,2,8
+ lw $t6,0($i2) # Te2[s0>>8]
+ $PTR_INS $i2,$s1,2,8
+ lw $t7,0($i3) # Te2[s1>>8]
+ $PTR_INS $i3,$s2,2,8
+
+ lw $t8,0($i0) # Te3[s3]
+ _xtr $i0,$s0,24-2
+ lw $t9,0($i1) # Te3[s0]
+ _xtr $i1,$s1,24-2
+ lw $t10,0($i2) # Te3[s1]
+ _xtr $i2,$s2,24-2
+ lw $t11,0($i3) # Te3[s2]
+ _xtr $i3,$s3,24-2
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+# endif
+ rotr $t4,$t4,16
+ rotr $t5,$t5,16
+ rotr $t6,$t6,16
+ rotr $t7,$t7,16
+
+ rotr $t8,$t8,24
+ rotr $t9,$t9,24
+ rotr $t10,$t10,24
+ rotr $t11,$t11,24
+#else
lwl $t4,2($i0) # Te2[s2>>8]
lwl $t5,2($i1) # Te2[s3>>8]
lwl $t6,2($i2) # Te2[s0>>8]
lwl $t7,2($i3) # Te2[s1>>8]
lwr $t4,1($i0) # Te2[s2>>8]
- lwr $t5,1($i1) # Te2[s3>>8]
- lwr $t6,1($i2) # Te2[s0>>8]
- lwr $t7,1($i3) # Te2[s1>>8]
-
_xtr $i0,$s3,0-2
+ lwr $t5,1($i1) # Te2[s3>>8]
_xtr $i1,$s0,0-2
+ lwr $t6,1($i2) # Te2[s0>>8]
_xtr $i2,$s1,0-2
+ lwr $t7,1($i3) # Te2[s1>>8]
_xtr $i3,$s2,0-2
+
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
@@ -185,14 +360,14 @@ _mips_AES_encrypt:
lwl $t10,1($i2) # Te3[s1]
lwl $t11,1($i3) # Te3[s2]
lwr $t8,0($i0) # Te3[s3]
- lwr $t9,0($i1) # Te3[s0]
- lwr $t10,0($i2) # Te3[s1]
- lwr $t11,0($i3) # Te3[s2]
-
_xtr $i0,$s0,24-2
+ lwr $t9,0($i1) # Te3[s0]
_xtr $i1,$s1,24-2
+ lwr $t10,0($i2) # Te3[s1]
_xtr $i2,$s2,24-2
+ lwr $t11,0($i3) # Te3[s2]
_xtr $i3,$s3,24-2
+
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
@@ -201,24 +376,24 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#endif
xor $t0,$t4
- xor $t1,$t5
- xor $t2,$t6
- xor $t3,$t7
lw $t4,0($i0) # Te0[s0>>24]
+ xor $t1,$t5
lw $t5,0($i1) # Te0[s1>>24]
+ xor $t2,$t6
lw $t6,0($i2) # Te0[s2>>24]
+ xor $t3,$t7
lw $t7,0($i3) # Te0[s3>>24]
- lw $s0,0($key0)
- lw $s1,4($key0)
- lw $s2,8($key0)
- lw $s3,12($key0)
-
xor $t0,$t8
+ lw $s0,0($key0)
xor $t1,$t9
+ lw $s1,4($key0)
xor $t2,$t10
+ lw $s2,8($key0)
xor $t3,$t11
+ lw $s3,12($key0)
xor $t0,$t4
xor $t1,$t5
@@ -234,6 +409,7 @@ _mips_AES_encrypt:
.set noreorder
bnez $cnt,.Loop_enc
_xtr $i0,$s1,16-2
+#endif
.set reorder
_xtr $i1,$s2,16-2
@@ -248,14 +424,14 @@ _mips_AES_encrypt:
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t0,2($i0) # Te4[s1>>16]
- lbu $t1,2($i1) # Te4[s2>>16]
- lbu $t2,2($i2) # Te4[s3>>16]
- lbu $t3,2($i3) # Te4[s0>>16]
-
_xtr $i0,$s2,8-2
+ lbu $t1,2($i1) # Te4[s2>>16]
_xtr $i1,$s3,8-2
+ lbu $t2,2($i2) # Te4[s3>>16]
_xtr $i2,$s0,8-2
+ lbu $t3,2($i3) # Te4[s0>>16]
_xtr $i3,$s1,8-2
+
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
@@ -264,15 +440,44 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+# if defined(_MIPSEL)
lbu $t4,2($i0) # Te4[s2>>8]
+ $PTR_INS $i0,$s0,2,8
lbu $t5,2($i1) # Te4[s3>>8]
+ $PTR_INS $i1,$s1,2,8
lbu $t6,2($i2) # Te4[s0>>8]
+ $PTR_INS $i2,$s2,2,8
lbu $t7,2($i3) # Te4[s1>>8]
+ $PTR_INS $i3,$s3,2,8
+ lbu $t8,2($i0) # Te4[s0>>24]
+ _xtr $i0,$s3,0-2
+ lbu $t9,2($i1) # Te4[s1>>24]
+ _xtr $i1,$s0,0-2
+ lbu $t10,2($i2) # Te4[s2>>24]
+ _xtr $i2,$s1,0-2
+ lbu $t11,2($i3) # Te4[s3>>24]
+ _xtr $i3,$s2,0-2
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+# else
+ lbu $t4,2($i0) # Te4[s2>>8]
_xtr $i0,$s0,24-2
+ lbu $t5,2($i1) # Te4[s3>>8]
_xtr $i1,$s1,24-2
+ lbu $t6,2($i2) # Te4[s0>>8]
_xtr $i2,$s2,24-2
+ lbu $t7,2($i3) # Te4[s1>>8]
_xtr $i3,$s3,24-2
+
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
@@ -282,18 +487,76 @@ _mips_AES_encrypt:
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,2($i0) # Te4[s0>>24]
+ $PTR_INS $i0,$s3,2,8
lbu $t9,2($i1) # Te4[s1>>24]
+ $PTR_INS $i1,$s0,2,8
lbu $t10,2($i2) # Te4[s2>>24]
+ $PTR_INS $i2,$s1,2,8
lbu $t11,2($i3) # Te4[s3>>24]
+ $PTR_INS $i3,$s2,2,8
+# endif
+ _ins $t0,16
+ _ins $t1,16
+ _ins $t2,16
+ _ins $t3,16
+ _ins2 $t0,$t4,8
+ lbu $t4,2($i0) # Te4[s3]
+ _ins2 $t1,$t5,8
+ lbu $t5,2($i1) # Te4[s0]
+ _ins2 $t2,$t6,8
+ lbu $t6,2($i2) # Te4[s1]
+ _ins2 $t3,$t7,8
+ lbu $t7,2($i3) # Te4[s2]
+
+ _ins2 $t0,$t8,24
+ lw $s0,0($key0)
+ _ins2 $t1,$t9,24
+ lw $s1,4($key0)
+ _ins2 $t2,$t10,24
+ lw $s2,8($key0)
+ _ins2 $t3,$t11,24
+ lw $s3,12($key0)
+
+ _ins2 $t0,$t4,0
+ _ins2 $t1,$t5,0
+ _ins2 $t2,$t6,0
+ _ins2 $t3,$t7,0
+#else
+ lbu $t4,2($i0) # Te4[s2>>8]
+ _xtr $i0,$s0,24-2
+ lbu $t5,2($i1) # Te4[s3>>8]
+ _xtr $i1,$s1,24-2
+ lbu $t6,2($i2) # Te4[s0>>8]
+ _xtr $i2,$s2,24-2
+ lbu $t7,2($i3) # Te4[s1>>8]
+ _xtr $i3,$s3,24-2
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+ lbu $t8,2($i0) # Te4[s0>>24]
_xtr $i0,$s3,0-2
+ lbu $t9,2($i1) # Te4[s1>>24]
_xtr $i1,$s0,0-2
+ lbu $t10,2($i2) # Te4[s2>>24]
_xtr $i2,$s1,0-2
+ lbu $t11,2($i3) # Te4[s3>>24]
_xtr $i3,$s2,0-2
+
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
_ins $t0,16
_ins $t1,16
@@ -306,27 +569,21 @@ _mips_AES_encrypt:
_ins $t7,8
xor $t0,$t4
- xor $t1,$t5
- xor $t2,$t6
- xor $t3,$t7
-
- $PTR_ADD $i0,$Tbl
- $PTR_ADD $i1,$Tbl
- $PTR_ADD $i2,$Tbl
- $PTR_ADD $i3,$Tbl
lbu $t4,2($i0) # Te4[s3]
+ xor $t1,$t5
lbu $t5,2($i1) # Te4[s0]
+ xor $t2,$t6
lbu $t6,2($i2) # Te4[s1]
+ xor $t3,$t7
lbu $t7,2($i3) # Te4[s2]
_ins $t8,24
- _ins $t9,24
- _ins $t10,24
- _ins $t11,24
-
lw $s0,0($key0)
+ _ins $t9,24
lw $s1,4($key0)
+ _ins $t10,24
lw $s2,8($key0)
+ _ins $t11,24
lw $s3,12($key0)
xor $t0,$t8
@@ -343,7 +600,7 @@ _mips_AES_encrypt:
xor $t1,$t5
xor $t2,$t6
xor $t3,$t7
-
+#endif
xor $s0,$t0
xor $s1,$t1
xor $s2,$t2
@@ -455,6 +712,89 @@ _mips_AES_decrypt:
xor $s3,$t3
sub $cnt,1
+#if defined(__mips_smartmips)
+ ext $i0,$s3,16,8
+.Loop_dec:
+ ext $i1,$s0,16,8
+ ext $i2,$s1,16,8
+ ext $i3,$s2,16,8
+ lwxs $t0,$i0($Tbl) # Td1[s3>>16]
+ ext $i0,$s2,8,8
+ lwxs $t1,$i1($Tbl) # Td1[s0>>16]
+ ext $i1,$s3,8,8
+ lwxs $t2,$i2($Tbl) # Td1[s1>>16]
+ ext $i2,$s0,8,8
+ lwxs $t3,$i3($Tbl) # Td1[s2>>16]
+ ext $i3,$s1,8,8
+
+ lwxs $t4,$i0($Tbl) # Td2[s2>>8]
+ ext $i0,$s1,0,8
+ lwxs $t5,$i1($Tbl) # Td2[s3>>8]
+ ext $i1,$s2,0,8
+ lwxs $t6,$i2($Tbl) # Td2[s0>>8]
+ ext $i2,$s3,0,8
+ lwxs $t7,$i3($Tbl) # Td2[s1>>8]
+ ext $i3,$s0,0,8
+
+ lwxs $t8,$i0($Tbl) # Td3[s1]
+ ext $i0,$s0,24,8
+ lwxs $t9,$i1($Tbl) # Td3[s2]
+ ext $i1,$s1,24,8
+ lwxs $t10,$i2($Tbl) # Td3[s3]
+ ext $i2,$s2,24,8
+ lwxs $t11,$i3($Tbl) # Td3[s0]
+ ext $i3,$s3,24,8
+
+ rotr $t0,$t0,8
+ rotr $t1,$t1,8
+ rotr $t2,$t2,8
+ rotr $t3,$t3,8
+
+ rotr $t4,$t4,16
+ rotr $t5,$t5,16
+ rotr $t6,$t6,16
+ rotr $t7,$t7,16
+
+ xor $t0,$t4
+ lwxs $t4,$i0($Tbl) # Td0[s0>>24]
+ xor $t1,$t5
+ lwxs $t5,$i1($Tbl) # Td0[s1>>24]
+ xor $t2,$t6
+ lwxs $t6,$i2($Tbl) # Td0[s2>>24]
+ xor $t3,$t7
+ lwxs $t7,$i3($Tbl) # Td0[s3>>24]
+
+ rotr $t8,$t8,24
+ lw $s0,0($key0)
+ rotr $t9,$t9,24
+ lw $s1,4($key0)
+ rotr $t10,$t10,24
+ lw $s2,8($key0)
+ rotr $t11,$t11,24
+ lw $s3,12($key0)
+
+ xor $t0,$t8
+ xor $t1,$t9
+ xor $t2,$t10
+ xor $t3,$t11
+
+ xor $t0,$t4
+ xor $t1,$t5
+ xor $t2,$t6
+ xor $t3,$t7
+
+ sub $cnt,1
+ $PTR_ADD $key0,16
+ xor $s0,$t0
+ xor $s1,$t1
+ xor $s2,$t2
+ xor $s3,$t3
+ .set noreorder
+ bnez $cnt,.Loop_dec
+ ext $i0,$s3,16,8
+
+ _xtr $i0,$s3,16-2
+#else
_xtr $i0,$s3,16-2
.Loop_dec:
_xtr $i1,$s0,16-2
@@ -468,19 +808,88 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+ lw $t0,0($i0) # Td1[s3>>16]
+ _xtr $i0,$s2,8-2
+ lw $t1,0($i1) # Td1[s0>>16]
+ _xtr $i1,$s3,8-2
+ lw $t2,0($i2) # Td1[s1>>16]
+ _xtr $i2,$s0,8-2
+ lw $t3,0($i3) # Td1[s2>>16]
+ _xtr $i3,$s1,8-2
+#else
lwl $t0,3($i0) # Td1[s3>>16]
lwl $t1,3($i1) # Td1[s0>>16]
lwl $t2,3($i2) # Td1[s1>>16]
lwl $t3,3($i3) # Td1[s2>>16]
lwr $t0,2($i0) # Td1[s3>>16]
- lwr $t1,2($i1) # Td1[s0>>16]
- lwr $t2,2($i2) # Td1[s1>>16]
- lwr $t3,2($i3) # Td1[s2>>16]
-
_xtr $i0,$s2,8-2
+ lwr $t1,2($i1) # Td1[s0>>16]
_xtr $i1,$s3,8-2
+ lwr $t2,2($i2) # Td1[s1>>16]
_xtr $i2,$s0,8-2
+ lwr $t3,2($i3) # Td1[s2>>16]
_xtr $i3,$s1,8-2
+#endif
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+ rotr $t0,$t0,8
+ rotr $t1,$t1,8
+ rotr $t2,$t2,8
+ rotr $t3,$t3,8
+# if defined(_MIPSEL)
+ lw $t4,0($i0) # Td2[s2>>8]
+ _xtr $i0,$s1,0-2
+ lw $t5,0($i1) # Td2[s3>>8]
+ _xtr $i1,$s2,0-2
+ lw $t6,0($i2) # Td2[s0>>8]
+ _xtr $i2,$s3,0-2
+ lw $t7,0($i3) # Td2[s1>>8]
+ _xtr $i3,$s0,0-2
+
+ and $i0,0x3fc
+ and $i1,0x3fc
+ and $i2,0x3fc
+ and $i3,0x3fc
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+ lw $t8,0($i0) # Td3[s1]
+ $PTR_INS $i0,$s0,2,8
+ lw $t9,0($i1) # Td3[s2]
+ $PTR_INS $i1,$s1,2,8
+ lw $t10,0($i2) # Td3[s3]
+ $PTR_INS $i2,$s2,2,8
+ lw $t11,0($i3) # Td3[s0]
+ $PTR_INS $i3,$s3,2,8
+#else
+ lw $t4,0($i0) # Td2[s2>>8]
+ $PTR_INS $i0,$s1,2,8
+ lw $t5,0($i1) # Td2[s3>>8]
+ $PTR_INS $i1,$s2,2,8
+ lw $t6,0($i2) # Td2[s0>>8]
+ $PTR_INS $i2,$s3,2,8
+ lw $t7,0($i3) # Td2[s1>>8]
+ $PTR_INS $i3,$s0,2,8
+
+ lw $t8,0($i0) # Td3[s1]
+ _xtr $i0,$s0,24-2
+ lw $t9,0($i1) # Td3[s2]
+ _xtr $i1,$s1,24-2
+ lw $t10,0($i2) # Td3[s3]
+ _xtr $i2,$s2,24-2
+ lw $t11,0($i3) # Td3[s0]
+ _xtr $i3,$s3,24-2
+
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
@@ -489,19 +898,30 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#endif
+ rotr $t4,$t4,16
+ rotr $t5,$t5,16
+ rotr $t6,$t6,16
+ rotr $t7,$t7,16
+
+ rotr $t8,$t8,24
+ rotr $t9,$t9,24
+ rotr $t10,$t10,24
+ rotr $t11,$t11,24
+#else
lwl $t4,2($i0) # Td2[s2>>8]
lwl $t5,2($i1) # Td2[s3>>8]
lwl $t6,2($i2) # Td2[s0>>8]
lwl $t7,2($i3) # Td2[s1>>8]
lwr $t4,1($i0) # Td2[s2>>8]
- lwr $t5,1($i1) # Td2[s3>>8]
- lwr $t6,1($i2) # Td2[s0>>8]
- lwr $t7,1($i3) # Td2[s1>>8]
-
_xtr $i0,$s1,0-2
+ lwr $t5,1($i1) # Td2[s3>>8]
_xtr $i1,$s2,0-2
+ lwr $t6,1($i2) # Td2[s0>>8]
_xtr $i2,$s3,0-2
+ lwr $t7,1($i3) # Td2[s1>>8]
_xtr $i3,$s0,0-2
+
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
@@ -515,14 +935,14 @@ _mips_AES_decrypt:
lwl $t10,1($i2) # Td3[s3]
lwl $t11,1($i3) # Td3[s0]
lwr $t8,0($i0) # Td3[s1]
- lwr $t9,0($i1) # Td3[s2]
- lwr $t10,0($i2) # Td3[s3]
- lwr $t11,0($i3) # Td3[s0]
-
_xtr $i0,$s0,24-2
+ lwr $t9,0($i1) # Td3[s2]
_xtr $i1,$s1,24-2
+ lwr $t10,0($i2) # Td3[s3]
_xtr $i2,$s2,24-2
+ lwr $t11,0($i3) # Td3[s0]
_xtr $i3,$s3,24-2
+
and $i0,0x3fc
and $i1,0x3fc
and $i2,0x3fc
@@ -531,27 +951,25 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#endif
xor $t0,$t4
- xor $t1,$t5
- xor $t2,$t6
- xor $t3,$t7
-
-
lw $t4,0($i0) # Td0[s0>>24]
+ xor $t1,$t5
lw $t5,0($i1) # Td0[s1>>24]
+ xor $t2,$t6
lw $t6,0($i2) # Td0[s2>>24]
+ xor $t3,$t7
lw $t7,0($i3) # Td0[s3>>24]
- lw $s0,0($key0)
- lw $s1,4($key0)
- lw $s2,8($key0)
- lw $s3,12($key0)
-
xor $t0,$t8
+ lw $s0,0($key0)
xor $t1,$t9
+ lw $s1,4($key0)
xor $t2,$t10
+ lw $s2,8($key0)
xor $t3,$t11
+ lw $s3,12($key0)
xor $t0,$t4
xor $t1,$t5
@@ -567,38 +985,39 @@ _mips_AES_decrypt:
.set noreorder
bnez $cnt,.Loop_dec
_xtr $i0,$s3,16-2
+#endif
.set reorder
lw $t4,1024($Tbl) # prefetch Td4
- lw $t5,1024+32($Tbl)
- lw $t6,1024+64($Tbl)
- lw $t7,1024+96($Tbl)
- lw $t8,1024+128($Tbl)
- lw $t9,1024+160($Tbl)
- lw $t10,1024+192($Tbl)
- lw $t11,1024+224($Tbl)
-
_xtr $i0,$s3,16
+ lw $t5,1024+32($Tbl)
_xtr $i1,$s0,16
+ lw $t6,1024+64($Tbl)
_xtr $i2,$s1,16
+ lw $t7,1024+96($Tbl)
_xtr $i3,$s2,16
+ lw $t8,1024+128($Tbl)
and $i0,0xff
+ lw $t9,1024+160($Tbl)
and $i1,0xff
+ lw $t10,1024+192($Tbl)
and $i2,0xff
+ lw $t11,1024+224($Tbl)
and $i3,0xff
+
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t0,1024($i0) # Td4[s3>>16]
- lbu $t1,1024($i1) # Td4[s0>>16]
- lbu $t2,1024($i2) # Td4[s1>>16]
- lbu $t3,1024($i3) # Td4[s2>>16]
-
_xtr $i0,$s2,8
+ lbu $t1,1024($i1) # Td4[s0>>16]
_xtr $i1,$s3,8
+ lbu $t2,1024($i2) # Td4[s1>>16]
_xtr $i2,$s0,8
+ lbu $t3,1024($i3) # Td4[s2>>16]
_xtr $i3,$s1,8
+
and $i0,0xff
and $i1,0xff
and $i2,0xff
@@ -607,29 +1026,108 @@ _mips_AES_decrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+# if defined(_MIPSEL)
lbu $t4,1024($i0) # Td4[s2>>8]
+ $PTR_INS $i0,$s0,0,8
lbu $t5,1024($i1) # Td4[s3>>8]
+ $PTR_INS $i1,$s1,0,8
lbu $t6,1024($i2) # Td4[s0>>8]
+ $PTR_INS $i2,$s2,0,8
lbu $t7,1024($i3) # Td4[s1>>8]
+ $PTR_INS $i3,$s3,0,8
+ lbu $t8,1024($i0) # Td4[s0>>24]
+ _xtr $i0,$s1,0
+ lbu $t9,1024($i1) # Td4[s1>>24]
+ _xtr $i1,$s2,0
+ lbu $t10,1024($i2) # Td4[s2>>24]
+ _xtr $i2,$s3,0
+ lbu $t11,1024($i3) # Td4[s3>>24]
+ _xtr $i3,$s0,0
+
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+# else
+ lbu $t4,1024($i0) # Td4[s2>>8]
_xtr $i0,$s0,24
+ lbu $t5,1024($i1) # Td4[s3>>8]
_xtr $i1,$s1,24
+ lbu $t6,1024($i2) # Td4[s0>>8]
_xtr $i2,$s2,24
+ lbu $t7,1024($i3) # Td4[s1>>8]
_xtr $i3,$s3,24
+
$PTR_ADD $i0,$Tbl
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
lbu $t8,1024($i0) # Td4[s0>>24]
+ $PTR_INS $i0,$s1,0,8
lbu $t9,1024($i1) # Td4[s1>>24]
+ $PTR_INS $i1,$s2,0,8
lbu $t10,1024($i2) # Td4[s2>>24]
+ $PTR_INS $i2,$s3,0,8
lbu $t11,1024($i3) # Td4[s3>>24]
+ $PTR_INS $i3,$s0,0,8
+# endif
+ _ins $t0,16
+ _ins $t1,16
+ _ins $t2,16
+ _ins $t3,16
+
+ _ins2 $t0,$t4,8
+ lbu $t4,1024($i0) # Td4[s1]
+ _ins2 $t1,$t5,8
+ lbu $t5,1024($i1) # Td4[s2]
+ _ins2 $t2,$t6,8
+ lbu $t6,1024($i2) # Td4[s3]
+ _ins2 $t3,$t7,8
+ lbu $t7,1024($i3) # Td4[s0]
+
+ _ins2 $t0,$t8,24
+ lw $s0,0($key0)
+ _ins2 $t1,$t9,24
+ lw $s1,4($key0)
+ _ins2 $t2,$t10,24
+ lw $s2,8($key0)
+ _ins2 $t3,$t11,24
+ lw $s3,12($key0)
+ _ins2 $t0,$t4,0
+ _ins2 $t1,$t5,0
+ _ins2 $t2,$t6,0
+ _ins2 $t3,$t7,0
+#else
+ lbu $t4,1024($i0) # Td4[s2>>8]
+ _xtr $i0,$s0,24
+ lbu $t5,1024($i1) # Td4[s3>>8]
+ _xtr $i1,$s1,24
+ lbu $t6,1024($i2) # Td4[s0>>8]
+ _xtr $i2,$s2,24
+ lbu $t7,1024($i3) # Td4[s1>>8]
+ _xtr $i3,$s3,24
+
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+ lbu $t8,1024($i0) # Td4[s0>>24]
_xtr $i0,$s1,0
+ lbu $t9,1024($i1) # Td4[s1>>24]
_xtr $i1,$s2,0
+ lbu $t10,1024($i2) # Td4[s2>>24]
_xtr $i2,$s3,0
+ lbu $t11,1024($i3) # Td4[s3>>24]
_xtr $i3,$s0,0
+ $PTR_ADD $i0,$Tbl
+ $PTR_ADD $i1,$Tbl
+ $PTR_ADD $i2,$Tbl
+ $PTR_ADD $i3,$Tbl
+
_ins $t0,16
_ins $t1,16
_ins $t2,16
@@ -641,44 +1139,38 @@ _mips_AES_de