From 9833757b5d8683cc2e92ab45115794bc2bd9e66c Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 14 Nov 2011 20:47:22 +0000 Subject: s390x assembler pack update from HEAD. --- crypto/rc4/asm/rc4-s390x.pl | 49 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'crypto/rc4') diff --git a/crypto/rc4/asm/rc4-s390x.pl b/crypto/rc4/asm/rc4-s390x.pl index c7ed59510b..1aa754820c 100644 --- a/crypto/rc4/asm/rc4-s390x.pl +++ b/crypto/rc4/asm/rc4-s390x.pl @@ -13,6 +13,29 @@ # "cluster" Address Generation Interlocks, so that one pipeline stall # resolves several dependencies. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 it was measured to perform +# 50% better than code generated by gcc 4.3. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + +while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} +open STDOUT,">$output"; + $rp="%r14"; $sp="%r15"; $code=<<___; @@ -39,7 +62,12 @@ $code.=<<___; .type RC4,\@function .align 64 RC4: - stmg %r6,%r11,48($sp) + stm${g} %r6,%r11,6*$SIZE_T($sp) +___ +$code.=<<___ if ($flavour =~ /3[12]/); + llgfr $len,$len +___ +$code.=<<___; llgc $XX[0],0($key) llgc $YY,1($key) la $XX[0],1($XX[0]) @@ -90,7 +118,7 @@ $code.=<<___; xgr $acc,$TX[1] stg $acc,0($out) la $out,8($out) - brct $cnt,.Loop8 + brctg $cnt,.Loop8 .Lshort: lghi $acc,7 @@ -122,7 +150,7 @@ $code.=<<___; ahi $XX[0],-1 stc $XX[0],0($key) stc $YY,1($key) - lmg %r6,%r11,48($sp) + lm${g} %r6,%r11,6*$SIZE_T($sp) br $rp .size RC4,.-RC4 .string "RC4 for s390x, CRYPTOGAMS by " @@ -130,7 +158,7 @@ $code.=<<___; ___ } -# void private_RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) +# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) { $cnt="%r0"; $idx="%r1"; @@ -143,11 +171,11 @@ $ikey="%r7"; $iinp="%r8"; $code.=<<___; -.globl private_RC4_set_key -.type private_RC4_set_key,\@function +.globl RC4_set_key +.type RC4_set_key,\@function .align 64 -private_RC4_set_key: - stmg %r6,%r8,48($sp) +RC4_set_key: + stm${g} %r6,%r8,6*$SIZE_T($sp) lhi $cnt,256 la $idx,0(%r0) sth $idx,0($key) @@ -180,9 +208,9 @@ private_RC4_set_key: la $iinp,0(%r0) j .L2ndloop .Ldone: - lmg %r6,%r8,48($sp) + lm${g} %r6,%r8,6*$SIZE_T($sp) br $rp -.size private_RC4_set_key,.-private_RC4_set_key +.size RC4_set_key,.-RC4_set_key ___ } @@ -203,3 +231,4 @@ RC4_options: ___ print $code; +close STDOUT; # force flush -- cgit v1.2.3