diff options
author | Andy Polyakov <appro@openssl.org> | 2016-09-01 10:39:15 +0200 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2016-09-02 13:33:17 +0200 |
commit | 947716c1872d210828122212d076d503ae68b928 (patch) | |
tree | cf5de69328e737756a20349f2930f1cd8af5edd1 /crypto | |
parent | a43249122baabb9ebaee0822d683f1fdb60f72a7 (diff) |
MIPS assembly pack: adapt it for MIPS[32|64]R6.
MIPS[32|64]R6 is binary and source incompatible with previous MIPS ISA
specifications. Fortunately it's still possible to resolve differences
in source code with standard pre-processor and switching to trap-free
version of addition and subtraction instructions.
Reviewed-by: Richard Levitte <levitte@openssl.org>
Diffstat (limited to 'crypto')
-rw-r--r-- | crypto/aes/asm/aes-mips.pl | 87 | ||||
-rw-r--r-- | crypto/aes/build.info | 1 | ||||
-rw-r--r-- | crypto/bn/asm/mips-mont.pl | 102 | ||||
-rw-r--r-- | crypto/bn/asm/mips.pl | 742 | ||||
-rw-r--r-- | crypto/bn/build.info | 16 | ||||
-rw-r--r-- | crypto/mips_arch.h | 40 | ||||
-rwxr-xr-x | crypto/poly1305/asm/poly1305-mips.pl | 50 | ||||
-rw-r--r-- | crypto/poly1305/build.info | 1 | ||||
-rw-r--r-- | crypto/sha/asm/sha1-mips.pl | 26 | ||||
-rw-r--r-- | crypto/sha/asm/sha512-mips.pl | 22 | ||||
-rw-r--r-- | crypto/sha/build.info | 3 |
11 files changed, 608 insertions, 482 deletions
diff --git a/crypto/aes/asm/aes-mips.pl b/crypto/aes/asm/aes-mips.pl index 439578d9c2..0eb1474a3a 100644 --- a/crypto/aes/asm/aes-mips.pl +++ b/crypto/aes/asm/aes-mips.pl @@ -65,8 +65,8 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { $PTR_LA="dla"; - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 + $PTR_ADD="daddu"; # incidentally works even on n32 + $PTR_SUB="dsubu"; # incidentally works even on n32 $PTR_INS="dins"; $REG_S="sd"; $REG_L="ld"; @@ -74,8 +74,8 @@ if ($flavour =~ /64|n32/i) { $SZREG=8; } else { $PTR_LA="la"; - $PTR_ADD="add"; - $PTR_SUB="sub"; + $PTR_ADD="addu"; + $PTR_SUB="subu"; $PTR_INS="ins"; $REG_S="sw"; $REG_L="lw"; @@ -102,15 +102,13 @@ open STDOUT,">$output"; my ($MSB,$LSB)=(0,3); # automatically converted to little-endian $code.=<<___; +#include "mips_arch.h" + .text #ifdef OPENSSL_FIPSCANISTER # include <openssl/fipssyms.h> #endif -#if defined(__mips_smartmips) && !defined(_MIPS_ARCH_MIPS32R2) -#define _MIPS_ARCH_MIPS32R2 -#endif - #if !defined(__mips_eabi) && (!defined(__vxworks) || defined(__pic__)) .option pic2 #endif @@ -146,7 +144,7 @@ _mips_AES_encrypt: xor $s2,$t2 xor $s3,$t3 - sub $cnt,1 + subu $cnt,1 #if defined(__mips_smartmips) ext $i0,$s1,16,8 .Loop_enc: @@ -218,7 +216,7 @@ _mips_AES_encrypt: xor $t2,$t6 xor $t3,$t7 - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key0,16 xor $s0,$t0 xor $s1,$t1 @@ -409,7 +407,7 @@ _mips_AES_encrypt: xor $t2,$t6 xor $t3,$t7 - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key0,16 xor $s0,$t0 xor $s1,$t1 @@ -657,6 +655,12 @@ $code.=<<___; .set reorder $PTR_LA $Tbl,AES_Te # PIC-ified 'load address' +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $s0,0($inp) + lw $s1,4($inp) + lw $s2,8($inp) + lw $s3,12($inp) +#else lwl $s0,0+$MSB($inp) lwl $s1,4+$MSB($inp) lwl $s2,8+$MSB($inp) @@ -665,9 +669,16 @@ $code.=<<___; lwr $s1,4+$LSB($inp) lwr $s2,8+$LSB($inp) lwr $s3,12+$LSB($inp) +#endif bal _mips_AES_encrypt +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + sw $s0,0($out) + sw $s1,4($out) + sw $s2,8($out) + sw $s3,12($out) +#else swr $s0,0+$LSB($out) swr $s1,4+$LSB($out) swr $s2,8+$LSB($out) @@ -676,6 +687,7 @@ $code.=<<___; swl $s1,4+$MSB($out) swl $s2,8+$MSB($out) swl $s3,12+$MSB($out) +#endif .set noreorder $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) @@ -720,7 +732,7 @@ _mips_AES_decrypt: xor $s2,$t2 xor $s3,$t3 - sub $cnt,1 + subu $cnt,1 #if defined(__mips_smartmips) ext $i0,$s3,16,8 .Loop_dec: @@ -792,7 +804,7 @@ _mips_AES_decrypt: xor $t2,$t6 xor $t3,$t7 - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key0,16 xor $s0,$t0 xor $s1,$t1 @@ -985,7 +997,7 @@ _mips_AES_decrypt: xor $t2,$t6 xor $t3,$t7 - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key0,16 xor $s0,$t0 xor $s1,$t1 @@ -1228,6 +1240,12 @@ $code.=<<___; .set reorder $PTR_LA $Tbl,AES_Td # PIC-ified 'load address' +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $s0,0($inp) + lw $s1,4($inp) + lw $s2,8($inp) + lw $s3,12($inp) +#else lwl $s0,0+$MSB($inp) lwl $s1,4+$MSB($inp) lwl $s2,8+$MSB($inp) @@ -1236,9 +1254,16 @@ $code.=<<___; lwr $s1,4+$LSB($inp) lwr $s2,8+$LSB($inp) lwr $s3,12+$LSB($inp) +#endif bal _mips_AES_decrypt +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + sw $s0,0($out) + sw $s1,4($out) + sw $s2,8($out) + sw $s3,12($out) +#else swr $s0,0+$LSB($out) swr $s1,4+$LSB($out) swr $s2,8+$LSB($out) @@ -1247,6 +1272,7 @@ $code.=<<___; swl $s1,4+$MSB($out) swl $s2,8+$MSB($out) swl $s3,12+$MSB($out) +#endif .set noreorder $REG_L $ra,$FRAMESIZE-1*$SZREG($sp) @@ -1295,35 +1321,52 @@ _mips_AES_set_encrypt_key: $PTR_ADD $rcon,$Tbl,256 .set reorder +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $rk0,0($inp) # load 128 bits + lw $rk1,4($inp) + lw $rk2,8($inp) + lw $rk3,12($inp) +#else lwl $rk0,0+$MSB($inp) # load 128 bits lwl $rk1,4+$MSB($inp) lwl $rk2,8+$MSB($inp) lwl $rk3,12+$MSB($inp) - li $at,128 lwr $rk0,0+$LSB($inp) lwr $rk1,4+$LSB($inp) lwr $rk2,8+$LSB($inp) lwr $rk3,12+$LSB($inp) +#endif + li $at,128 .set noreorder beq $bits,$at,.L128bits li $cnt,10 .set reorder +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $rk4,16($inp) # load 192 bits + lw $rk5,20($inp) +#else lwl $rk4,16+$MSB($inp) # load 192 bits lwl $rk5,20+$MSB($inp) - li $at,192 lwr $rk4,16+$LSB($inp) lwr $rk5,20+$LSB($inp) +#endif + li $at,192 .set noreorder beq $bits,$at,.L192bits li $cnt,8 .set reorder +#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6) + lw $rk6,24($inp) # load 256 bits + lw $rk7,28($inp) +#else lwl $rk6,24+$MSB($inp) # load 256 bits lwl $rk7,28+$MSB($inp) - li $at,256 lwr $rk6,24+$LSB($inp) lwr $rk7,28+$LSB($inp) +#endif + li $at,256 .set noreorder beq $bits,$at,.L256bits li $cnt,7 @@ -1353,7 +1396,7 @@ _mips_AES_set_encrypt_key: sw $rk1,4($key) sw $rk2,8($key) sw $rk3,12($key) - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key,16 _bias $i0,24 @@ -1410,7 +1453,7 @@ _mips_AES_set_encrypt_key: sw $rk3,12($key) sw $rk4,16($key) sw $rk5,20($key) - sub $cnt,1 + subu $cnt,1 $PTR_ADD $key,24 _bias $i0,24 @@ -1471,7 +1514,7 @@ _mips_AES_set_encrypt_key: sw $rk5,20($key) sw $rk6,24($key) sw $rk7,28($key) - sub $cnt,1 + subu $cnt,1 _bias $i0,24 _bias $i1,16 @@ -1653,7 +1696,7 @@ $code.=<<___; lw $tp1,16($key) # modulo-scheduled lui $x80808080,0x8080 - sub $cnt,1 + subu $cnt,1 or $x80808080,0x8080 sll $cnt,2 $PTR_ADD $key,16 @@ -1716,7 +1759,7 @@ $code.=<<___; lw $tp1,4($key) # modulo-scheduled xor $tpe,$tp2 #endif - sub $cnt,1 + subu $cnt,1 sw $tpe,0($key) $PTR_ADD $key,4 bnez $cnt,.Lmix diff --git a/crypto/aes/build.info b/crypto/aes/build.info index cf6cb5ec25..bcc71abaeb 100644 --- a/crypto/aes/build.info +++ b/crypto/aes/build.info @@ -35,6 +35,7 @@ GENERATE[aesp8-ppc.s]=asm/aesp8-ppc.pl $(PERLASM_SCHEME) GENERATE[aes-parisc.s]=asm/aes-parisc.pl $(PERLASM_SCHEME) GENERATE[aes-mips.S]=asm/aes-mips.pl $(PERLASM_SCHEME) +INCLUDE[aes-mips.o]=.. GENERATE[aesv8-armx.S]=asm/aesv8-armx.pl $(PERLASM_SCHEME) INCLUDE[aesv8-armx.o]=.. diff --git a/crypto/bn/asm/mips-mont.pl b/crypto/bn/asm/mips-mont.pl index a907571bec..56d4202824 100644 --- a/crypto/bn/asm/mips-mont.pl +++ b/crypto/bn/asm/mips-mont.pl @@ -56,14 +56,14 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 + $PTR_ADD="daddu"; # incidentally works even on n32 + $PTR_SUB="dsubu"; # incidentally works even on n32 $REG_S="sd"; $REG_L="ld"; $SZREG=8; } else { - $PTR_ADD="add"; - $PTR_SUB="sub"; + $PTR_ADD="addu"; + $PTR_SUB="subu"; $REG_S="sw"; $REG_L="lw"; $SZREG=4; @@ -121,6 +121,8 @@ $m1=$s11; $FRAMESIZE=14; $code=<<___; +#include "mips_arch.h" + .text .set noat @@ -183,27 +185,27 @@ $code.=<<___; $PTR_SUB $sp,$num and $sp,$at - $MULTU $aj,$bi - $LD $alo,$BNSZ($ap) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 - $MULTU $lo0,$n0 - mflo $m1 - - $MULTU $alo,$bi - mflo $alo - mfhi $ahi - - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 - $MULTU $nlo,$m1 + $MULTU ($aj,$bi) + $LD $ahi,$BNSZ($ap) + $LD $nhi,$BNSZ($np) + mflo ($lo0,$aj,$bi) + mfhi ($hi0,$aj,$bi) + $MULTU ($lo0,$n0) + mflo ($m1,$lo0,$n0) + + $MULTU ($ahi,$bi) + mflo ($alo,$ahi,$bi) + mfhi ($ahi,$ahi,$bi) + + $MULTU ($nj,$m1) + mflo ($lo1,$nj,$m1) + mfhi ($hi1,$nj,$m1) + $MULTU ($nhi,$m1) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 $ADDU $hi1,$at - mflo $nlo - mfhi $nhi + mflo ($nlo,$nhi,$m1) + mfhi ($nhi,$nhi,$m1) move $tp,$sp li $j,2*$BNSZ @@ -215,25 +217,25 @@ $code.=<<___; $LD $aj,($aj) $LD $nj,($nj) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $ADDU $lo0,$alo,$hi0 $ADDU $lo1,$nlo,$hi1 sltu $at,$lo0,$hi0 sltu $t0,$lo1,$hi1 $ADDU $hi0,$ahi,$at $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi + mflo ($alo,$aj,$bi) + mfhi ($ahi,$aj,$bi) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 - $MULTU $nj,$m1 + $MULTU ($nj,$m1) $ADDU $hi1,$at addu $j,$BNSZ $ST $lo1,($tp) sltu $t0,$j,$num - mflo $nlo - mfhi $nhi + mflo ($nlo,$nj,$m1) + mfhi ($nhi,$nj,$m1) bnez $t0,.L1st $PTR_ADD $tp,$BNSZ @@ -263,34 +265,34 @@ $code.=<<___; $PTR_ADD $bi,$bp,$i $LD $bi,($bi) $LD $aj,($ap) - $LD $alo,$BNSZ($ap) + $LD $ahi,$BNSZ($ap) $LD $tj,($sp) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $LD $nj,($np) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 + $LD $nhi,$BNSZ($np) + mflo ($lo0,$aj,$bi) + mfhi ($hi0,$aj,$bi) $ADDU $lo0,$tj - $MULTU $lo0,$n0 + $MULTU ($lo0,$n0) sltu $at,$lo0,$tj $ADDU $hi0,$at - mflo $m1 + mflo ($m1,$lo0,$n0) - $MULTU $alo,$bi - mflo $alo - mfhi $ahi + $MULTU ($ahi,$bi) + mflo ($alo,$ahi,$bi) + mfhi ($ahi,$ahi,$bi) - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 + $MULTU ($nj,$m1) + mflo ($lo1,$nj,$m1) + mfhi ($hi1,$nj,$m1) - $MULTU $nlo,$m1 + $MULTU ($nhi,$m1) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 $ADDU $hi1,$at - mflo $nlo - mfhi $nhi + mflo ($nlo,$nhi,$m1) + mfhi ($nhi,$nhi,$m1) move $tp,$sp li $j,2*$BNSZ @@ -303,19 +305,19 @@ $code.=<<___; $LD $aj,($aj) $LD $nj,($nj) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $ADDU $lo0,$alo,$hi0 $ADDU $lo1,$nlo,$hi1 sltu $at,$lo0,$hi0 sltu $t0,$lo1,$hi1 $ADDU $hi0,$ahi,$at $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi + mflo ($alo,$aj,$bi) + mfhi ($ahi,$aj,$bi) $ADDU $lo0,$tj addu $j,$BNSZ - $MULTU $nj,$m1 + $MULTU ($nj,$m1) sltu $at,$lo0,$tj $ADDU $lo1,$lo0 $ADDU $hi0,$at @@ -323,8 +325,8 @@ $code.=<<___; $LD $tj,2*$BNSZ($tp) $ADDU $hi1,$t0 sltu $at,$j,$num - mflo $nlo - mfhi $nhi + mflo ($nlo,$nj,$m1) + mfhi ($nhi,$nj,$m1) $ST $lo1,($tp) bnez $at,.Linner $PTR_ADD $tp,$BNSZ diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl index 420f01f3a4..102b656229 100644 --- a/crypto/bn/asm/mips.pl +++ b/crypto/bn/asm/mips.pl @@ -109,6 +109,22 @@ $gp=$v1 if ($flavour =~ /nubi/i); $minus4=$v1; $code.=<<___; +#include "mips_arch.h" + +#if defined(_MIPS_ARCH_MIPS64R6) +# define ddivu(rs,rt) +# define mfqt(rd,rs,rt) ddivu rd,rs,rt +# define mfrm(rd,rs,rt) dmodu rd,rs,rt +#elif defined(_MIPS_ARCH_MIPS32R6) +# define divu(rs,rt) +# define mfqt(rd,rs,rt) divu rd,rs,rt +# define mfrm(rd,rs,rt) modu rd,rs,rt +#else +# define $DIVU(rs,rt) $DIVU $zero,rs,rt +# define mfqt(rd,rs,rt) mflo rd +# define mfrm(rd,rs,rt) mfhi rd +#endif + .rdata .asciiz "mips3.s, Version 1.2" .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro\@fy.chalmers.se>" @@ -151,7 +167,7 @@ $code.=<<___; .L_bn_mul_add_words_loop: $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,0($a0) $LD $t2,$BNSZ($a1) $LD $t3,$BNSZ($a0) @@ -161,11 +177,11 @@ $code.=<<___; sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit # values", but it seems to work fine # even on 64-bit registers. - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 - $MULTU $t2,$a3 + $MULTU ($t2,$a3) sltu $at,$t1,$at $ST $t1,0($a0) $ADDU $v0,$at @@ -174,11 +190,11 @@ $code.=<<___; $LD $ta3,3*$BNSZ($a0) $ADDU $t3,$v0 sltu $v0,$t3,$v0 - mflo $at - mfhi $t2 + mflo ($at,$t2,$a3) + mfhi ($t2,$t2,$a3) $ADDU $t3,$at $ADDU $v0,$t2 - $MULTU $ta0,$a3 + $MULTU ($ta0,$a3) sltu $at,$t3,$at $ST $t3,$BNSZ($a0) $ADDU $v0,$at @@ -188,11 +204,11 @@ $code.=<<___; $PTR_ADD $a1,4*$BNSZ $ADDU $ta1,$v0 sltu $v0,$ta1,$v0 - mflo $at - mfhi $ta0 + mflo ($at,$ta0,$a3) + mfhi ($ta0,$ta0,$a3) $ADDU $ta1,$at $ADDU $v0,$ta0 - $MULTU $ta2,$a3 + $MULTU ($ta2,$a3) sltu $at,$ta1,$at $ST $ta1,-2*$BNSZ($a0) $ADDU $v0,$at @@ -201,8 +217,8 @@ $code.=<<___; and $ta0,$a2,$minus4 $ADDU $ta3,$v0 sltu $v0,$ta3,$v0 - mflo $at - mfhi $ta2 + mflo ($at,$ta2,$a3) + mfhi ($ta2,$ta2,$a3) $ADDU $ta3,$at $ADDU $v0,$ta2 sltu $at,$ta3,$at @@ -217,13 +233,13 @@ $code.=<<___; .L_bn_mul_add_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,0($a0) subu $a2,1 $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -232,13 +248,13 @@ $code.=<<___; beqz $a2,.L_bn_mul_add_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,$BNSZ($a0) subu $a2,1 $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -247,12 +263,12 @@ $code.=<<___; beqz $a2,.L_bn_mul_add_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,2*$BNSZ($a0) $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -310,40 +326,40 @@ $code.=<<___; .L_bn_mul_words_loop: $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) $LD $ta2,3*$BNSZ($a1) - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at - $MULTU $t2,$a3 + $MULTU ($t2,$a3) $ST $v0,0($a0) $ADDU $v0,$t1,$t0 subu $a2,4 $PTR_ADD $a0,4*$BNSZ $PTR_ADD $a1,4*$BNSZ - mflo $at - mfhi $t2 + mflo ($at,$t2,$a3) + mfhi ($t2,$t2,$a3) $ADDU $v0,$at sltu $t3,$v0,$at - $MULTU $ta0,$a3 + $MULTU ($ta0,$a3) $ST $v0,-3*$BNSZ($a0) $ADDU $v0,$t3,$t2 - mflo $at - mfhi $ta0 + mflo ($at,$ta0,$a3) + mfhi ($ta0,$ta0,$a3) $ADDU $v0,$at sltu $ta1,$v0,$at - $MULTU $ta2,$a3 + $MULTU ($ta2,$a3) $ST $v0,-2*$BNSZ($a0) $ADDU $v0,$ta1,$ta0 and $ta0,$a2,$minus4 - mflo $at - mfhi $ta2 + mflo ($at,$ta2,$a3) + mfhi ($ta2,$ta2,$a3) $ADDU $v0,$at sltu $ta3,$v0,$at $ST $v0,-$BNSZ($a0) @@ -357,10 +373,10 @@ $code.=<<___; .L_bn_mul_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) subu $a2,1 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,0($a0) @@ -368,10 +384,10 @@ $code.=<<___; beqz $a2,.L_bn_mul_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) subu $a2,1 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,$BNSZ($a0) @@ -379,9 +395,9 @@ $code.=<<___; beqz $a2,.L_bn_mul_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 - mflo $at - mfhi $t0 + $MULTU ($t0,$a3) + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,2*$BNSZ($a0) @@ -438,35 +454,35 @@ $code.=<<___; .L_bn_sqr_words_loop: $LD $t0,0($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) $LD $ta2,3*$BNSZ($a1) - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,0($a0) $ST $t0,$BNSZ($a0) - $MULTU $t2,$t2 + $MULTU ($t2,$t2) subu $a2,4 $PTR_ADD $a0,8*$BNSZ $PTR_ADD $a1,4*$BNSZ - mflo $t3 - mfhi $t2 + mflo ($t3,$t2,$t2) + mfhi ($t2,$t2,$t2) $ST $t3,-6*$BNSZ($a0) $ST $t2,-5*$BNSZ($a0) - $MULTU $ta0,$ta0 - mflo $ta1 - mfhi $ta0 + $MULTU ($ta0,$ta0) + mflo ($ta1,$ta0,$ta0) + mfhi ($ta0,$ta0,$ta0) $ST $ta1,-4*$BNSZ($a0) $ST $ta0,-3*$BNSZ($a0) - $MULTU $ta2,$ta2 + $MULTU ($ta2,$ta2) and $ta0,$a2,$minus4 - mflo $ta3 - mfhi $ta2 + mflo ($ta3,$ta2,$ta2) + mfhi ($ta2,$ta2,$ta2) $ST $ta3,-2*$BNSZ($a0) .set noreorder @@ -479,27 +495,27 @@ $code.=<<___; .L_bn_sqr_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) subu $a2,1 - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,0($a0) $ST $t0,$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) subu $a2,1 - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,2*$BNSZ($a0) $ST $t0,3*$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$t0 - mflo $t1 - mfhi $t0 + $MULTU ($t0,$t0) + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,4*$BNSZ($a0) $ST $t0,5*$BNSZ($a0) @@ -823,11 +839,11 @@ $code.=<<___; move $ta3,$ra bal bn_div_words_internal move $ra,$ta3 - $MULTU $ta2,$v0 + $MULTU ($ta2,$v0) $LD $t2,-2*$BNSZ($a3) move $ta0,$zero - mfhi $t1 - mflo $t0 + mfhi ($t1,$ta2,$v0) + mflo ($t0,$ta2,$v0) sltu $t8,$t1,$a1 .L_bn_div_3_words_inner_loop: bnez $t8,.L_bn_div_3_words_inner_loop_done @@ -930,15 +946,15 @@ $code.=<<___; $SRL $HH,$a0,4*$BNSZ # bits $SRL $QT,4*$BNSZ # q=0xffffffff beq $DH,$HH,.L_bn_div_words_skip_div1 - $DIVU $zero,$a0,$DH - mflo $QT + $DIVU ($a0,$DH) + mfqt ($QT,$a0,$DH) .L_bn_div_words_skip_div1: - $MULTU $a2,$QT + $MULTU ($a2,$QT) $SLL $t3,$a0,4*$BNSZ # bits $SRL $at,$a1,4*$BNSZ # bits or $t3,$at - mflo $t0 - mfhi $t1 + mflo ($t0,$a2,$QT) + mfhi ($t1,$a2,$QT) .L_bn_div_words_inner_loop1: sltu $t2,$t3,$t0 seq $t8,$HH,$t1 @@ -963,15 +979,15 @@ $code.=<<___; $SRL $HH,$a0,4*$BNSZ # bits $SRL $QT,4*$BNSZ # q=0xffffffff beq $DH,$HH,.L_bn_div_words_skip_div2 - $DIVU $zero,$a0,$DH - mflo $QT + $DIVU ($a0,$DH) + mfqt ($QT,$a0,$DH) .L_bn_div_words_skip_div2: - $MULTU $a2,$QT + $MULTU ($a2,$QT) $SLL $t3,$a0,4*$BNSZ # bits $SRL $at,$a1,4*$BNSZ # bits or $t3,$at - mflo $t0 - mfhi $t1 + mflo ($t0,$a2,$QT) + mfhi ($t1,$a2,$QT) .L_bn_div_words_inner_loop2: sltu $t2,$t3,$t0 seq $t8,$HH,$t1 @@ -1070,592 +1086,592 @@ $code.=<<___; $LD $b_0,0($a2) $LD $a_1,$BNSZ($a1) $LD $a_2,2*$BNSZ($a1) - $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_3,3*$BNSZ($a1) $LD $b_1,$BNSZ($a2) $LD $b_2,2*$BNSZ($a2) $LD $b_3,3*$BNSZ($a2) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$b_0) + mfhi ($c_2,$a_0,$b_0) $LD $a_4,4*$BNSZ($a1) $LD $a_5,5*$BNSZ($a1) - $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); + $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1); $LD $a_6,6*$BNSZ($a1) $LD $a_7,7*$BNSZ($a1) $LD $b_4,4*$BNSZ($a2) $LD $b_5,5*$BNSZ($a2) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_1) + mfhi ($t_2,$a_0,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); + $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1); $ADDU $c_3,$t_2,$at $LD $b_6,6*$BNSZ($a2) $LD $b_7,7*$BNSZ($a2) $ST $c_1,0($a0) # r[0]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_0) + mfhi ($t_2,$a_1,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 $ST $c_2,$BNSZ($a0) # r[1]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_0) + mfhi ($t_2,$a_2,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); + $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_1) + mfhi ($t_2,$a_1,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); + $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_2) + mfhi ($t_2,$a_0,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) # r[2]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_3) + mfhi ($t_2,$a_0,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); + $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_2) + mfhi ($t_2,$a_1,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); + $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_1) + mfhi ($t_2,$a_2,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); + $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_0) + mfhi ($t_2,$a_3,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); + $MULTU ($a_4,$b_0) # mul_add_c(a[4],b[0],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,3*$BNSZ($a0) # r[3]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_0) + mfhi ($t_2,$a_4,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); + $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_1) + mfhi ($t_2,$a_3,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_2) + mfhi ($t_2,$a_2,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); + $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_3) + mfhi ($t_2,$a_1,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); + $MULTU ($a_0,$b_4) # mul_add_c(a[0],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_4) + mfhi ($t_2,$a_0,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); + $MULTU ($a_0,$b_5) # mul_add_c(a[0],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) # r[4]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_5) + mfhi ($t_2,$a_0,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); + $MULTU ($a_1,$b_4) # mul_add_c(a[1],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_4) + mfhi ($t_2,$a_1,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_3) + mfhi ($t_2,$a_2,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); + $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_2) + mfhi ($t_2,$a_3,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); + $MULTU ($a_4,$b_1) # mul_add_c(a[4],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_1) + mfhi ($t_2,$a_4,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); + $MULTU ($a_5,$b_0) # mul_add_c(a[5],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_0) + mfhi ($t_2,$a_5,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); + $MULTU ($a_6,$b_0) # mul_add_c(a[6],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,5*$BNSZ($a0) # r[5]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_0) + mfhi ($t_2,$a_6,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); + $MULTU ($a_5,$b_1) # mul_add_c(a[5],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_1) + mfhi ($t_2,$a_5,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); + $MULTU ($a_4,$b_2) # mul_add_c(a[4],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_2) + mfhi ($t_2,$a_4,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_3) + mfhi ($t_2,$a_3,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); + $MULTU ($a_2,$b_4) # mul_add_c(a[2],b[4],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_4) + mfhi ($t_2,$a_2,$b_4) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); + $MULTU ($a_1,$b_5) # mul_add_c(a[1],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_5) + mfhi ($t_2,$a_1,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); + $MULTU ($a_0,$b_6) # mul_add_c(a[0],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_6) + mfhi ($t_2,$a_0,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MU |