summaryrefslogtreecommitdiffstats
path: root/crypto/sha/asm/sha1-c64x-large.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/sha/asm/sha1-c64x-large.pl')
-rw-r--r--crypto/sha/asm/sha1-c64x-large.pl230
1 files changed, 230 insertions, 0 deletions
diff --git a/crypto/sha/asm/sha1-c64x-large.pl b/crypto/sha/asm/sha1-c64x-large.pl
new file mode 100644
index 0000000000..3916ff3a3f
--- /dev/null
+++ b/crypto/sha/asm/sha1-c64x-large.pl
@@ -0,0 +1,230 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# SHA1 for C64x.
+#
+# November 2016
+#
+# This is fully-unrolled SHA1 implementation. It's 25% faster than
+# one with compact loops, doesn't use in-memory ring buffer, as
+# everything is accomodated in registers, and has "perfect" interrupt
+# agility. Drawback is obviously the code size...
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+($CTX,$INP,$NUM) = ("A4","B4","A6"); # arguments
+
+($A,$B,$C,$D,$E, $Arot,$F,$F0,$K) = map("A$_",(16..20, 21..24));
+@V = ($A,$B,$C,$D,$E);
+@X = map("B$_",(16..31));
+($Actx,$Bctx,$Cctx,$Dctx,$Ectx) = map("A$_",(3,6..9)); # zaps $NUM
+
+sub BODY_00_19 {
+my ($i,$a,$b,$c,$d,$e) = @_;
+my $j = ($i+1)&15;
+
+$code.=<<___ if ($i<14);
+ ROTL $a,5,$Arot ;; $i
+|| AND $c,$b,$F
+|| ANDN $d,$b,$F0
+|| ADD $K,$e,$e ; E+=K
+|| LDNW *${INP}++,@X[$i+2]
+ OR $F0,$F,$F ; F_00_19(B,C,D)
+|| ROTL $b,30,$b
+|| SWAP2 @X[$i+1],@X[$i+1]
+|| ADD @X[$i],$e,$e ; E+=X[i]
+ ADD $Arot,$e,$e ; E+=rot(A,5)
+|| SWAP4 @X[$i+1],@X[$i+1]
+ ADD $F,$e,$e ; E+=F_00_19(B,C,D)
+___
+$code.=<<___ if ($i==14);
+ ROTL $a,5,$Arot ;; $i
+|| AND $c,$b,$F
+|| ANDN $d,$b,$F0
+|| ADD $K,$e,$e ; E+=K
+ OR $F0,$F,$F ; F_00_19(B,C,D)
+|| ROTL $b,30,$b
+|| ADD @X[$i],$e,$e ; E+=X[i]
+|| SWAP2 @X[$i+1],@X[$i+1]
+ ADD $Arot,$e,$e ; E+=rot(A,5)
+|| SWAP4 @X[$i+1],@X[$i+1]
+ ADD $F,$e,$e ; E+=F_00_19(B,C,D)
+___
+$code.=<<___ if ($i==15);
+|| XOR @X[($j+2)&15],@X[$j],@X[$j]
+ ROTL $a,5,$Arot ;; $i
+|| AND $c,$b,$F
+|| ANDN $d,$b,$F0
+|| ADD $K,$e,$e ; E+=K
+|| XOR @X[($j+8)&15],@X[$j],@X[$j]
+ OR $F0,$F,$F ; F_00_19(B,C,D)
+|| ROTL $b,30,$b
+|| ADD @X[$i],$e,$e ; E+=X[i]
+|| XOR @X[($j+13)&15],@X[$j],@X[$j]
+ ADD $Arot,$e,$e ; E+=rot(A,5)
+|| ROTL @X[$j],1,@X[$j]
+ ADD $F,$e,$e ; E+=F_00_19(B,C,D)
+___
+$code.=<<___ if ($i>15);
+|| XOR @X[($j+2)&15],@X[$j],@X[$j]
+ ROTL $a,5,$Arot ;; $i
+|| AND $c,$b,$F
+|| ANDN $d,$b,$F0
+|| ADD $K,$e,$e ; E+=K
+|| XOR @X[($j+8)&15],@X[$j],@X[$j]
+ OR $F0,$F,$F ; F_00_19(B,C,D)
+|| ROTL $b,30,$b
+|| ADD @X[$i&15],$e,$e ; E+=X[i]
+|| XOR @X[($j+13)&15],@X[$j],@X[$j]
+ ADD $Arot,$e,$e ; E+=rot(A,5)
+|| ROTL @X[$j],1,@X[$j]
+ ADD $F,$e,$e ; E+=F_00_19(B,C,D)
+___
+}
+
+sub BODY_20_39 {
+my ($i,$a,$b,$c,$d,$e) = @_;
+my $j = ($i+1)&15;
+
+$code.=<<___ if ($i<79);
+|| XOR @X[($j+2)&15],@X[$j],@X[$j]
+ ROTL $a,5,$Arot ;; $i
+|| XOR $c,$b,$F
+|| ADD $K,$e,$e ; E+=K
+|| XOR @X[($j+8)&15],@X[$j],@X[$j]
+ XOR $d,$F,$F ; F_20_39(B,C,D)
+|| ROTL $b,30,$b
+|| ADD @X[$i&15],$e,$e ; E+=X[i]
+|| XOR @X[($j+13)&15],@X[$j],@X[$j]
+ ADD $Arot,$e,$e ; E+=rot(A,5)
+|| ROTL @X[$j],1,@X[$j]
+ ADD $F,$e,$e ; E+=F_20_39(B,C,D)
+___
+$code.=<<___ if ($i==79);
+|| [A0] B loop?
+|| [A0] LDNW *${INP}++,@X[0] ; pre-fetch input
+ ROTL $a,5,$Arot ;; $i
+|| XOR $c,$b,$F
+|| ADD $K,$e,$e ; E+=K
+|| [A0] LDNW *${INP}++,@X[1]
+ XOR $d,$F,$F ; F_20_39(B,C,D)
+|| ROTL $b,30,$b
+|| ADD @X[$i&15],$e,$e ; E+=X[i]
+ ADD $Arot,$e,$e ; E+=rot(A,5)
+ ADD $F,$e,$e ; E+=F_20_39(B,C,D)
+|| ADD $Bctx,$a,$a ; accumulate context
+|| ADD $Cctx,$b,$b
+ ADD $Dctx,$c,$c
+|| ADD $Ectx,$d,$d
+|| ADD $Actx,$e,$e
+;;===== branch to loop? is taken here
+___
+}
+
+sub BODY_40_59 {
+my ($i,$a,$b,$c,$d,$e) = @_;
+my $j = ($i+1)&15;
+
+$code.=<<___;
+|| XOR @X[($j+2)&15],@X[$j],@X[$j]
+ ROTL $a,5,$Arot ;; $i
+|| AND $c,$b,$F
+|| AND $d,$b,$F0
+|| ADD $K,$e,$e ; E+=K
+|| XOR @X[($j+8)&15],@X[$j],@X[$j]
+ XOR $F0,$F,$F
+|| AND $c,$d,$F0
+|| ROTL $b,30,$b
+|| XOR @X[($j+13)&15],@X[$j],@X[$j]
+|| ADD @X[$i&15],$e,$e ; E+=X[i]
+ XOR $F0,$F,$F ; F_40_59(B,C,D)
+|| ADD $Arot,$e,$e ; E+=rot(A,5)
+|| ROTL @X[$j],1,@X[$j]
+ ADD $F,$e,$e ; E+=F_20_39(B,C,D)
+___
+}
+
+$code=<<___;
+ .text
+
+ .if .ASSEMBLER_VERSION<7000000
+ .asg 0,__TI_EABI__
+ .endif
+ .if __TI_EABI__
+ .asg sha1_block_data_order,_sha1_block_data_order
+ .endif
+
+ .asg B3,RA
+ .asg A15,FP
+ .asg B15,SP
+
+ .if .BIG_ENDIAN
+ .asg MV,SWAP2
+ .asg MV,SWAP4
+ .endif
+
+ .global _sha1_block_data_order
+_sha1_block_data_order:
+ .asmfunc
+ MV $NUM,A0 ; reassign $NUM
+ [!A0] BNOP RA ; if ($NUM==0) return;
+|| [A0] LDW *${CTX}[0],$A ; load A-E...
+ [A0] LDW *${CTX}[1],$B
+ [A0] LDW *${CTX}[2],$C
+ [A0] LDW *${CTX}[3],$D
+ [A0] LDW *${CTX}[4],$E
+ [A0] LDNW *${INP}++,@X[0] ; pre-fetch input
+ [A0] LDNW *${INP}++,@X[1]
+ NOP 3
+
+loop?:
+ SUB A0,1,A0
+|| MV $A,$Actx
+|| MVD $B,$Bctx
+|| SWAP2 @X[0],@X[0]
+|| MVKL 0x5a827999,$K
+ MVKH 0x5a827999,$K ; K_00_19
+|| MV $C,$Cctx
+|| MV $D,$Dctx
+|| MVD $E,$Ectx
+|| SWAP4 @X[0],@X[0]
+___
+for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+|| MVKL 0x6ed9eba1,$K
+ MVKH 0x6ed9eba1,$K ; K_20_39
+___
+for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+|| MVKL 0x8f1bbcdc,$K
+ MVKH 0x8f1bbcdc,$K ; K_40_59
+___
+for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+|| MVKL 0xca62c1d6,$K
+ MVKH 0xca62c1d6,$K ; K_60_79
+___
+for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
+$code.=<<___;
+ BNOP RA ; return
+ STW $A,*${CTX}[0] ; emit A-E...
+ STW $B,*${CTX}[1]
+ STW $C,*${CTX}[2]
+ STW $D,*${CTX}[3]
+ STW $E,*${CTX}[4]
+ .endasmfunc
+
+ .sect .const
+ .cstring "SHA1 block transform for C64x, CRYPTOGAMS by <appro\@openssl.org>"
+ .align 4
+___
+
+print $code;
+close STDOUT;