summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2012-09-18 12:52:23 +0000
committerAndy Polyakov <appro@openssl.org>2012-09-18 12:52:23 +0000
commit5cc2159526bb088451d4ba1a266235e2baa025d0 (patch)
treed17c84e802ee90548aba39e158767c91514f20c5
parent6373af15d37bd5aeb75d207787f154914f0ff9d8 (diff)
MIPS assembly pack: add support for SmartMIPS ASE.
-rw-r--r--crypto/aes/asm/aes-mips.pl216
-rw-r--r--crypto/sha/asm/sha1-mips.pl6
-rw-r--r--crypto/sha/asm/sha512-mips.pl8
3 files changed, 208 insertions, 22 deletions
diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl
index a00193fa45..1d8afe9682 100644
--- a/crypto/aes/asm/aes-mips.pl
+++ b/crypto/aes/asm/aes-mips.pl
@@ -22,7 +22,10 @@
# September 2012
#
-# Add MIPS32R2 code.
+# Add MIPS32R2 (~10% less instructions) and SmartMIPS ASE (further
+# ~25% less instructions) code. Note that there is no run-time switch,
+# instead, code path is chosen upon pre-process time, pass -mips32r2
+# or/and -msmartmips.
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
@@ -95,7 +98,11 @@ $code.=<<___;
# include <openssl/fipssyms.h>
#endif
-#if !defined(__vxworks) || defined(__pic__)
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
+#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
.option pic2
#endif
.set noat
@@ -131,6 +138,89 @@ _mips_AES_encrypt:
xor $s3,$t3
sub $cnt,1
+#if defined(__mips_smartmips)
+ ext $i0,$s1,16,8
+.Loop_enc:
+ ext $i1,$s2,16,8
+ ext $i2,$s3,16,8
+ ext $i3,$s0,16,8
+ lwxs $t0,$i0($Tbl) # Te1[s1>>16]
+ ext $i0,$s2,8,8
+ lwxs $t1,$i1($Tbl) # Te1[s2>>16]
+ ext $i1,$s3,8,8
+ lwxs $t2,$i2($Tbl) # Te1[s3>>16]
+ ext $i2,$s0,8,8
+ lwxs $t3,$i3($Tbl) # Te1[s0>>16]
+ ext $i3,$s1,8,8
+
+ lwxs $t4,$i0($Tbl) # Te2[s2>>8]
+ ext $i0,$s3,0,8
+ lwxs $t5,$i1($Tbl) # Te2[s3>>8]
+ ext $i1,$s0,0,8
+ lwxs $t6,$i2($Tbl) # Te2[s0>>8]
+ ext $i2,$s1,0,8
+ lwxs $t7,$i3($Tbl) # Te2[s1>>8]
+ ext $i3,$s2,0,8
+
+ lwxs $t8,$i0($Tbl) # Te3[s3]
+ ext $i0,$s0,24,8
+ lwxs $t9,$i1($Tbl) # Te3[s0]
+ ext $i1,$s1,24,8
+ lwxs $t10,$i2($Tbl) # Te3[s1]
+ ext $i2,$s2,24,8
+ lwxs $t11,$i3($Tbl) # Te3[s2]
+ ext $i3,$s3,24,8
+
+ rotr $t0,$t0,8
+ rotr $t1,$t1,8
+ rotr $t2,$t2,8
+ rotr $t3,$t3,8
+
+ rotr $t4,$t4,16
+ rotr $t5,$t5,16
+ rotr $t6,$t6,16
+ rotr $t7,$t7,16
+
+ xor $t0,$t4
+ lwxs $t4,$i0($Tbl) # Te0[s0>>24]
+ xor $t1,$t5
+ lwxs $t5,$i1($Tbl) # Te0[s1>>24]
+ xor $t2,$t6
+ lwxs $t6,$i2($Tbl) # Te0[s2>>24]
+ xor $t3,$t7
+ lwxs $t7,$i3($Tbl) # Te0[s3>>24]
+
+ rotr $t8,$t8,24
+ lw $s0,0($key0)
+ rotr $t9,$t9,24
+ lw $s1,4($key0)
+ rotr $t10,$t10,24
+ lw $s2,8($key0)
+ rotr $t11,$t11,24
+ lw $s3,12($key0)
+
+ xor $t0,$t8
+ xor $t1,$t9
+ xor $t2,$t10
+ xor $t3,$t11
+
+ xor $t0,$t4
+ xor $t1,$t5
+ xor $t2,$t6
+ xor $t3,$t7
+
+ sub $cnt,1
+ $PTR_ADD $key0,16
+ xor $s0,$t0
+ xor $s1,$t1
+ xor $s2,$t2
+ xor $s3,$t3
+ .set noreorder
+ bnez $cnt,.Loop_enc
+ ext $i0,$s1,16,8
+
+ _xtr $i0,$s1,16-2
+#else
_xtr $i0,$s1,16-2
.Loop_enc:
_xtr $i1,$s2,16-2
@@ -181,13 +271,13 @@ _mips_AES_encrypt:
rotr $t2,$t2,8
rotr $t3,$t3,8
# if defined(_MIPSEL)
- lw $t4,0($i0) # Te1[s1>>16]
+ lw $t4,0($i0) # Te2[s2>>8]
_xtr $i0,$s3,0-2
- lw $t5,0($i1) # Te1[s2>>16]
+ lw $t5,0($i1) # Te2[s3>>8]
_xtr $i1,$s0,0-2
- lw $t6,0($i2) # Te1[s3>>16]
+ lw $t6,0($i2) # Te2[s0>>8]
_xtr $i2,$s1,0-2
- lw $t7,0($i3) # Te1[s0>>16]
+ lw $t7,0($i3) # Te2[s1>>8]
_xtr $i3,$s2,0-2
and $i0,0x3fc
@@ -198,31 +288,31 @@ _mips_AES_encrypt:
$PTR_ADD $i1,$Tbl
$PTR_ADD $i2,$Tbl
$PTR_ADD $i3,$Tbl
- lw $t8,0($i0) # Te1[s1>>16]
+ lw $t8,0($i0) # Te3[s3]
$PTR_INS $i0,$s0,2,8
- lw $t9,0($i1) # Te1[s2>>16]
+ lw $t9,0($i1) # Te3[s0]
$PTR_INS $i1,$s1,2,8
- lw $t10,0($i2) # Te1[s3>>16]
+ lw $t10,0($i2) # Te3[s1]
$PTR_INS $i2,$s2,2,8
- lw $t11,0($i3) # Te1[s0>>16]
+ lw $t11,0($i3) # Te3[s2]
$PTR_INS $i3,$s3,2,8
# else
- lw $t4,0($i0) # Te1[s1>>16]
+ lw $t4,0($i0) # Te2[s2>>8]
$PTR_INS $i0,$s3,2,8
- lw $t5,0($i1) # Te1[s2>>16]
+ lw $t5,0($i1) # Te2[s3>>8]
$PTR_INS $i1,$s0,2,8
- lw $t6,0($i2) # Te1[s3>>16]
+ lw $t6,0($i2) # Te2[s0>>8]
$PTR_INS $i2,$s1,2,8
- lw $t7,0($i3) # Te1[s0>>16]
+ lw $t7,0($i3) # Te2[s1>>8]
$PTR_INS $i3,$s2,2,8
- lw $t8,0($i0) # Te1[s1>>16]
+ lw $t8,0($i0) # Te3[s3]
_xtr $i0,$s0,24-2
- lw $t9,0($i1) # Te1[s2>>16]
+ lw $t9,0($i1) # Te3[s0]
_xtr $i1,$s1,24-2
- lw $t10,0($i2) # Te1[s3>>16]
+ lw $t10,0($i2) # Te3[s1]
_xtr $i2,$s2,24-2
- lw $t11,0($i3) # Te1[s0>>16]
+ lw $t11,0($i3) # Te3[s2]
_xtr $i3,$s3,24-2
and $i0,0x3fc
@@ -319,6 +409,7 @@ _mips_AES_encrypt:
.set noreorder
bnez $cnt,.Loop_enc
_xtr $i0,$s1,16-2
+#endif
.set reorder
_xtr $i1,$s2,16-2
@@ -621,6 +712,89 @@ _mips_AES_decrypt:
xor $s3,$t3
sub $cnt,1
+#if defined(__mips_smartmips)
+ ext $i0,$s3,16,8
+.Loop_dec:
+ ext $i1,$s0,16,8
+ ext $i2,$s1,16,8
+ ext $i3,$s2,16,8
+ lwxs $t0,$i0($Tbl) # Td1[s3>>16]
+ ext $i0,$s2,8,8
+ lwxs $t1,$i1($Tbl) # Td1[s0>>16]
+ ext $i1,$s3,8,8
+ lwxs $t2,$i2($Tbl) # Td1[s1>>16]
+ ext $i2,$s0,8,8
+ lwxs $t3,$i3($Tbl) # Td1[s2>>16]
+ ext $i3,$s1,8,8
+
+ lwxs $t4,$i0($Tbl) # Td2[s2>>8]
+ ext $i0,$s1,0,8
+ lwxs $t5,$i1($Tbl) # Td2[s3>>8]
+ ext $i1,$s2,0,8
+ lwxs $t6,$i2($Tbl) # Td2[s0>>8]
+ ext $i2,$s3,0,8
+ lwxs $t7,$i3($Tbl) # Td2[s1>>8]
+ ext $i3,$s0,0,8
+
+ lwxs $t8,$i0($Tbl) # Td3[s1]
+ ext $i0,$s0,24,8
+ lwxs $t9,$i1($Tbl) # Td3[s2]
+ ext $i1,$s1,24,8
+ lwxs $t10,$i2($Tbl) # Td3[s3]
+ ext $i2,$s2,24,8
+ lwxs $t11,$i3($Tbl) # Td3[s0]
+ ext $i3,$s3,24,8
+
+ rotr $t0,$t0,8
+ rotr $t1,$t1,8
+ rotr $t2,$t2,8
+ rotr $t3,$t3,8
+
+ rotr $t4,$t4,16
+ rotr $t5,$t5,16
+ rotr $t6,$t6,16
+ rotr $t7,$t7,16
+
+ xor $t0,$t4
+ lwxs $t4,$i0($Tbl) # Td0[s0>>24]
+ xor $t1,$t5
+ lwxs $t5,$i1($Tbl) # Td0[s1>>24]
+ xor $t2,$t6
+ lwxs $t6,$i2($Tbl) # Td0[s2>>24]
+ xor $t3,$t7
+ lwxs $t7,$i3($Tbl) # Td0[s3>>24]
+
+ rotr $t8,$t8,24
+ lw $s0,0($key0)
+ rotr $t9,$t9,24
+ lw $s1,4($key0)
+ rotr $t10,$t10,24
+ lw $s2,8($key0)
+ rotr $t11,$t11,24
+ lw $s3,12($key0)
+
+ xor $t0,$t8
+ xor $t1,$t9
+ xor $t2,$t10
+ xor $t3,$t11
+
+ xor $t0,$t4
+ xor $t1,$t5
+ xor $t2,$t6
+ xor $t3,$t7
+
+ sub $cnt,1
+ $PTR_ADD $key0,16
+ xor $s0,$t0
+ xor $s1,$t1
+ xor $s2,$t2
+ xor $s3,$t3
+ .set noreorder
+ bnez $cnt,.Loop_dec
+ ext $i0,$s3,16,8
+
+ _xtr $i0,$s3,16-2
+#else
_xtr $i0,$s3,16-2
.Loop_dec:
_xtr $i1,$s0,16-2
@@ -811,6 +985,7 @@ _mips_AES_decrypt:
.set noreorder
bnez $cnt,.Loop_dec
_xtr $i0,$s3,16-2
+#endif
.set reorder
lw $t4,1024($Tbl) # prefetch Td4
@@ -1936,7 +2111,10 @@ foreach (split("\n",$code)) {
sprintf("$1%d($3)",eval("$2-$2%4+($2%4+1)&3"))/e;
}
- s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e if(!$big_endian);
+ if (!$big_endian) {
+ s/(rotr\s+\$[0-9]+,\$[0-9]+),([0-9]+)/sprintf("$1,%d",32-$2)/e;
+ s/(ext\s+\$[0-9]+,\$[0-9]+),([0-9]+),8/sprintf("$1,%d,8",24-$2)/e;
+ }
print $_,"\n";
}
diff --git a/crypto/sha/asm/sha1-mips.pl b/crypto/sha/asm/sha1-mips.pl
index 63c3efa03f..73bf0609b4 100644
--- a/crypto/sha/asm/sha1-mips.pl
+++ b/crypto/sha/asm/sha1-mips.pl
@@ -17,7 +17,7 @@
# September 2012.
#
-# Add MIPS32r2 code.
+# Add MIPS32r2 code (>25% less instructions).
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
@@ -333,6 +333,10 @@ $code=<<___;
# include <openssl/fipssyms.h>
#endif
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
.text
.set noat
diff --git a/crypto/sha/asm/sha512-mips.pl b/crypto/sha/asm/sha512-mips.pl
index d190414203..1fff09bb4d 100644
--- a/crypto/sha/asm/sha512-mips.pl
+++ b/crypto/sha/asm/sha512-mips.pl
@@ -19,7 +19,7 @@
# September 2012.
#
-# Add MIPS[32|64]r2 code.
+# Add MIPS[32|64]R2 code (>25% less instructions).
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
@@ -293,9 +293,13 @@ $code.=<<___;
# include <openssl/fipssyms.h>
#endif
+#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2)
+#define _MIPS_ARCH_MIPS32R2
+#endif
+
.text
.set noat
-#if !defined(__vxworks) || defined(__pic__)
+#if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__))
.option pic2
#endif