summaryrefslogtreecommitdiffstats
path: root/crypto/rc4
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2009-02-11 10:01:36 +0000
committerAndy Polyakov <appro@openssl.org>2009-02-11 10:01:36 +0000
commit13c3a1defad3837192a8dbfe41074666ed4eb9c1 (patch)
tree1c0f1eaa461fe914919ad0d5c4cf51fac7b06fff /crypto/rc4
parentaaa29f9e8354aa8a3a8d33474a28b70a2010ad30 (diff)
RC4 for s390x.
Diffstat (limited to 'crypto/rc4')
-rw-r--r--crypto/rc4/Makefile3
-rw-r--r--crypto/rc4/asm/rc4-s390x.pl205
2 files changed, 208 insertions, 0 deletions
diff --git a/crypto/rc4/Makefile b/crypto/rc4/Makefile
index f932a89035..264451a213 100644
--- a/crypto/rc4/Makefile
+++ b/crypto/rc4/Makefile
@@ -50,6 +50,9 @@ rc4-x86_64.s: asm/rc4-x86_64.pl
rc4-ia64.S: asm/rc4-ia64.pl
$(PERL) asm/rc4-ia64.pl $(CFLAGS) > $@
+rc4-s390x.s: asm/rc4-s390x.pl
+ $(PERL) asm/rc4-s390x.pl > $@
+
rc4-ia64.s: rc4-ia64.S
@case `awk '/^#define RC4_INT/{print$$NF}' $(TOP)/include/openssl/opensslconf.h` in \
int) set -x; $(CC) $(CFLAGS) -DSZ=4 -E rc4-ia64.S > $@ ;; \
diff --git a/crypto/rc4/asm/rc4-s390x.pl b/crypto/rc4/asm/rc4-s390x.pl
new file mode 100644
index 0000000000..4366c4fc1a
--- /dev/null
+++ b/crypto/rc4/asm/rc4-s390x.pl
@@ -0,0 +1,205 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# February 2009
+#
+# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to avoid
+# arithmetic instructions, but adhere to load and load address in
+# order to minimize Address Generation Interlock.
+
+$rp="%r14";
+$sp="%r15";
+$code=<<___;
+.text
+
+___
+
+# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out)
+{
+$acc="%r0";
+$cnt="%r1";
+$key="%r2";
+$len="%r3";
+$inp="%r4";
+$out="%r5";
+
+@XX=("%r6","%r7");
+@TX=("%r8","%r9");
+$YY="%r10";
+$TY="%r11";
+
+$code.=<<___;
+.globl RC4
+.type RC4,\@function
+.align 64
+RC4:
+ stmg %r6,%r11,48($sp)
+ llgc $XX[0],0($key)
+ llgc $YY,1($key)
+ la $XX[0],1($XX[0])
+ llgcr $XX[0],$XX[0]
+ llgc $TX[0],2($XX[0],$key)
+ srlg $cnt,$len,3
+ ltgr $cnt,$cnt
+ jz .Lshort
+ j .Loop8
+
+.align 64
+.Loop8:
+___
+for ($i=0;$i<8;$i++) {
+$code.=<<___;
+ la $YY,0($YY,$TX[0]) # $i
+ llgcr $YY,$YY
+ la $XX[1],1($XX[0])
+ llgcr $XX[1],$XX[1]
+___
+$code.=<<___ if ($i>1);
+ sllg $acc,$acc,8
+ ic $acc,2($TY,$key)
+___
+$code.=<<___ if ($i==1);
+ llgc $acc,2($TY,$key)
+___
+$code.=<<___;
+ llgc $TY,2($YY,$key)
+ stc $TX[0],2($YY,$key)
+ llgc $TX[1],2($XX[1],$key)
+ stc $TY,2($XX[0],$key)
+ cr $XX[1],$YY
+ jne .Lcmov$i
+ la $TX[1],0($TX[0])
+.Lcmov$i:
+ la $TY,0($TY,$TX[0])
+ llgcr $TY,$TY
+___
+push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
+}
+
+$code.=<<___;
+ lg $TX[1],0($inp)
+ sllg $acc,$acc,8
+ la $inp,8($inp)
+ ic $acc,2($TY,$key)
+ xgr $acc,$TX[1]
+ stg $acc,0($out)
+ la $out,8($out)
+ brct $cnt,.Loop8
+
+.Lshort:
+ lghi $acc,7
+ ngr $len,$acc
+ jz .Lexit
+ j .Loop1
+
+.align 16
+.Loop1:
+ la $YY,0($YY,$TX[0])
+ llgcr $YY,$YY
+ llgc $TY,2($YY,$key)
+ stc $TX[0],2($YY,$key)
+ stc $TY,2($XX[0],$key)
+ la $TY,0($TY,$TX[0])
+ llgcr $TY,$TY
+ la $XX[0],1($XX[0])
+ llgcr $XX[0],$XX[0]
+ llgc $TY,2($TY,$key)
+ llgc $TX[0],2($XX[0],$key)
+ llgc $acc,0($inp)
+ la $inp,1($inp)
+ xr $acc,$TY
+ stc $acc,0($out)
+ la $out,1($out)
+ brct $len,.Loop1
+
+.Lexit:
+ ahi $XX[0],-1
+ stc $XX[0],0($key)
+ stc $YY,1($key)
+ lmg %r6,%r11,48($sp)
+ br $rp
+.size RC4,.-RC4
+.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>"
+
+___
+}
+
+# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp)
+{
+$cnt="%r0";
+$idx="%r1";
+$key="%r2";
+$len="%r3";
+$inp="%r4";
+$acc="%r5";
+$dat="%r6";
+$ikey="%r7";
+$iinp="%r8";
+
+$code.=<<___;
+.globl RC4_set_key
+.type RC4_set_key,\@function
+.align 64
+RC4_set_key:
+ stmg %r6,%r8,48($sp)
+ lhi $cnt,256
+ la $idx,0(%r0)
+ sth $idx,0($key)
+.align 4
+.L1stloop:
+ stc $idx,2($idx,$key)
+ la $idx,1($idx)
+ brct $cnt,.L1stloop
+
+ lghi $ikey,-256
+ lr $cnt,$len
+ la $iinp,0(%r0)
+ la $idx,0(%r0)
+.align 16
+.L2ndloop:
+ llgc $acc,2+256($ikey,$key)
+ llgc $dat,0($iinp,$inp)
+ la $idx,0($idx,$acc)
+ la $ikey,1($ikey)
+ la $idx,0($idx,$dat)
+ la $iinp,1($iinp)
+ llgcr $idx,$idx
+ tml $ikey,255
+ llgc $dat,2($idx,$key)
+ stc $dat,2+256-1($ikey,$key)
+ stc $acc,2($idx,$key)
+ jz .Ldone
+ brct $cnt,.L2ndloop
+ lr $cnt,$len
+ la $iinp,0(%r0)
+ j .L2ndloop
+.Ldone:
+ lmg %r6,%r8,48($sp)
+ br $rp
+.size RC4_set_key,.-RC4_set_key
+
+___
+}
+
+# const char *RC4_options()
+$code.=<<___;
+.globl RC4_options
+.type RC4_options,\@function
+.align 16
+RC4_options:
+ larl %r2,.Loptions
+ br %r14
+.size RC4_options,.-RC4_options
+.section .rodata
+.Loptions:
+.align 8
+.string "rc4(8x,char)"
+___
+
+print $code;