From e822c756b66024d49ab936bf77b745206660fcd2 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Mon, 29 Nov 2010 20:52:43 +0000 Subject: s390x assembler pack: adapt for -m31 build, see commentary in Configure for more details. --- crypto/rc4/asm/rc4-s390x.pl | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) (limited to 'crypto/rc4') diff --git a/crypto/rc4/asm/rc4-s390x.pl b/crypto/rc4/asm/rc4-s390x.pl index f26c515e78..1aa754820c 100644 --- a/crypto/rc4/asm/rc4-s390x.pl +++ b/crypto/rc4/asm/rc4-s390x.pl @@ -13,6 +13,26 @@ # "cluster" Address Generation Interlocks, so that one pipeline stall # resolves several dependencies. +# November 2010. +# +# Adapt for -m31 build. If kernel supports what's called "highgprs" +# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit +# instructions and achieve "64-bit" performance even in 31-bit legacy +# application context. The feature is not specific to any particular +# processor, as long as it's "z-CPU". Latter implies that the code +# remains z/Architecture specific. On z990 it was measured to perform +# 50% better than code generated by gcc 4.3. + +$flavour = shift; + +if ($flavour =~ /3[12]/) { + $SIZE_T=4; + $g=""; +} else { + $SIZE_T=8; + $g="g"; +} + while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} open STDOUT,">$output"; @@ -42,7 +62,12 @@ $code.=<<___; .type RC4,\@function .align 64 RC4: - stmg %r6,%r11,48($sp) + stm${g} %r6,%r11,6*$SIZE_T($sp) +___ +$code.=<<___ if ($flavour =~ /3[12]/); + llgfr $len,$len +___ +$code.=<<___; llgc $XX[0],0($key) llgc $YY,1($key) la $XX[0],1($XX[0]) @@ -93,7 +118,7 @@ $code.=<<___; xgr $acc,$TX[1] stg $acc,0($out) la $out,8($out) - brct $cnt,.Loop8 + brctg $cnt,.Loop8 .Lshort: lghi $acc,7 @@ -125,7 +150,7 @@ $code.=<<___; ahi $XX[0],-1 stc $XX[0],0($key) stc $YY,1($key) - lmg %r6,%r11,48($sp) + lm${g} %r6,%r11,6*$SIZE_T($sp) br $rp .size RC4,.-RC4 .string "RC4 for s390x, CRYPTOGAMS by " @@ -150,7 +175,7 @@ $code.=<<___; .type RC4_set_key,\@function .align 64 RC4_set_key: - stmg %r6,%r8,48($sp) + stm${g} %r6,%r8,6*$SIZE_T($sp) lhi $cnt,256 la $idx,0(%r0) sth $idx,0($key) @@ -183,7 +208,7 @@ RC4_set_key: la $iinp,0(%r0) j .L2ndloop .Ldone: - lmg %r6,%r8,48($sp) + lm${g} %r6,%r8,6*$SIZE_T($sp) br $rp .size RC4_set_key,.-RC4_set_key -- cgit v1.2.3