summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2005-07-18 17:11:13 +0000
committerAndy Polyakov <appro@openssl.org>2005-07-18 17:11:13 +0000
commit4ac210c16aec67d79fb1ee2cd67ae1fd25e61130 (patch)
treec1cac497dca43c98849302e99f8756430468bd1f
parent02703c74a4c10f3848cdd5e7ff5196ab45c7e67e (diff)
This update implements following improvements.
1. Original submission required minor modification to RC4_set_key, which we don't want to tolerate and therefore we fix assembler instead. 2. Eliminate remaining byte-order dependence [look for RC4_BIG_ENDIAN]. 3. Eliminate logical error [when key->x is referred prior key is verified]. 4. HP-UX assembler puked on MODSCHED_RC4 macro with "syntax error," macro has to be splitted in two. 5. Deploy parallel compare in function prologue. 6. Eliminate redundant instuctions and nops. 7. Eliminate assembler warnings.
-rw-r--r--crypto/rc4/asm/rc4-ia64.pl141
1 files changed, 54 insertions, 87 deletions
diff --git a/crypto/rc4/asm/rc4-ia64.pl b/crypto/rc4/asm/rc4-ia64.pl
index 330b95f7dd..988eec1e94 100644
--- a/crypto/rc4/asm/rc4-ia64.pl
+++ b/crypto/rc4/asm/rc4-ia64.pl
@@ -194,21 +194,13 @@ $threshold = (3 * ($phases * ($unroll_count + 1)) + 7);
sub I {
local *code = shift;
local $format = shift;
- local $a0 = shift;
- local $a1 = shift;
- local $a2 = shift;
- local $a3 = shift;
- $code .= sprintf ("\t\t".$format."\n", $a0, $a1, $a2, $a3);
+ $code .= sprintf ("\t\t".$format."\n", @_);
}
sub P {
local *code = shift;
local $format = shift;
- local $a0 = shift;
- local $a1 = shift;
- local $a2 = shift;
- local $a3 = shift;
- $code .= sprintf ($format."\n", $a0, $a1, $a2, $a3);
+ $code .= sprintf ($format."\n", @_);
}
sub STOP {
@@ -239,6 +231,10 @@ sub emit_body {
___
if (($p & 0xf) == 0) {
+ $c.="#ifdef RC4_BIG_ENDIAN\n";
+ &I(\$c,"shr.u OutWord[%u] = OutWord[%u], 32;;",
+ $iw1 % $NOutWord, $iw1 % $NOutWord);
+ $c.="#endif\n";
&I(\$c, "st4 [OutPtr] = OutWord[%u], 4", $iw1 % $NOutWord);
return;
}
@@ -311,6 +307,7 @@ ___
&I(\$bypass, "add J = J, SI[%u]", $i1 % $NSI);
&I(\$bypass, "mov SI[%u] = SI[%u]", $i0 % $NSI, $i1 % $NSI);
&I(\$bypass, "br.sptk.many .rc4Resume%u\n", $label);
+ &I(\$bypass, ";;");
}
}
@@ -394,10 +391,11 @@ $code=<<___;
/* Define a macro for the bit number of the n-th byte: */
-#ifdef L_ENDIAN
-# define BYTE_POS(n) (8 * (n))
-#else
+#if defined(_HPUX_SOURCE) || defined(B_ENDIAN)
+# define RC4_BIG_ENDIAN
# define BYTE_POS(n) (56 - (8 * (n)))
+#else
+# define BYTE_POS(n) (8 * (n))
#endif
/*
@@ -406,8 +404,9 @@ $code=<<___;
will never be taken since regardless of the number of bytes because
the epilogue count is 4.
*/
-
-#define MODSCHED_RC4(label) \\
+/* MODSCHED_RC4 macro was split to _PROLOGUE and _LOOP, because HP-UX
+ assembler failed on original macro with syntax error. <appro> */
+#define MODSCHED_RC4_PROLOGUE \\
{ \\
ld1 Data[0] = [InPtr], 1; \\
add IFinal = 1, I[1]; \\
@@ -421,8 +420,9 @@ $code=<<___;
{ \\
add J = J, SI[0]; \\
zxt1 I[0] = IFinal; \\
- br.cexit.spnt.few label; /* never taken */ \\
- } ;; \\
+ br.cexit.spnt.few .+16; /* never taken */ \\
+ } ;;
+#define MODSCHED_RC4_LOOP(label) \\
label: \\
{ .mmi; \\
(pComputeI) ld1 Data[0] = [InPtr], 1; \\
@@ -476,63 +476,42 @@ RC4:
OutWord[2]
.rotp pPhase[4]
-#ifdef _LP64
- add InPrefetch = 0, InputBuffer
- nop 0x0
- }
-#else
- ADDP InputBuffer = 0, InputBuffer
- ADDP StateTable = 0, StateTable
+ ADDP InPrefetch = 0, InputBuffer
+ ADDP KTable = 0, StateTable
}
- ;;
{
- ADDP InPrefetch = 0, InputBuffer
- ADDP OutputBuffer = 0, OutputBuffer
- nop 0x0
+ .mmi
+ ADDP InPtr = 0, InputBuffer
+ ADDP OutPtr = 0, OutputBuffer
+ mov RetVal = r0
}
-#endif
;;
{
.mmi
lfetch.nt1 [InPrefetch], 0x80
- LKEY I[1] = [StateTable], SZ
- mov OutPrefetch = OutputBuffer
- } ;;
- {
- .mii
- nop 0x0
- nop 0x0
- mov RetVal = r0
+ ADDP OutPrefetch = 0, OutputBuffer
}
{ // Return 0 if the input length is nonsensical
.mib
- nop 0x0
- cmp.ge L_NOK, L_OK = r0, DataLen
+ ADDP StateTable = 0, StateTable
+ cmp.ge.unc L_NOK, L_OK = r0, DataLen
(L_NOK) br.ret.sptk.few rp
}
;;
{
.mib
- nop 0x0
- cmp.eq L_NOK, L_OK = r0, InputBuffer
- (L_NOK) br.ret.sptk.few rp
+ cmp.eq.or L_NOK, L_OK = r0, InPtr
+ cmp.eq.or L_NOK, L_OK = r0, OutPtr
+ nop 0x0
}
- ;;
{
- .mib
- nop 0x0
- cmp.eq L_NOK, L_OK = r0, OutputBuffer
+ .mib
+ cmp.eq.or L_NOK, L_OK = r0, StateTable
+ nop 0x0
(L_NOK) br.ret.sptk.few rp
}
;;
- {
- .mib
- nop 0x0
- cmp.eq L_NOK, L_OK = r0, StateTable
- (L_NOK) br.ret.sptk.few rp
- }
-
-
+ LKEY I[1] = [KTable], SZ
/* Prefetch the state-table. It contains 256 elements of size SZ */
#if SZ == 1
@@ -568,8 +547,12 @@ RC4:
lfetch.fault.nt1 [tmp0], -256 // 3
lfetch.fault.nt1 [tmp1], -256;;
#endif
+ {
+ .mii
lfetch.fault.nt1 [tmp0] // 1
-
+ add I[1]=1,I[1];;
+ zxt1 I[1]=I[1]
+ }
{
.mmi
lfetch.nt1 [InPrefetch], 0x80
@@ -580,19 +563,13 @@ RC4:
{
.mmi
lfetch.excl.nt1 [OutPrefetch], 0x80
- LKEY J = [StateTable], SZ
- ADDP EndPtr = DataLen, InputBuffer
+ LKEY J = [KTable], SZ
+ ADDP EndPtr = DataLen, InPtr
} ;;
{
.mmi
- mov InPtr = InputBuffer
- mov OutPtr = OutputBuffer
ADDP EndPtr = -1, EndPtr // Make it point to
// last data byte.
- } ;;
- {
- .mii
- mov KTable = StateTable
mov One = 1
.save ar.lc, LCSave
mov LCSave = ar.lc
@@ -614,6 +591,7 @@ RC4:
} ;;
{
.mmb
+.pred.rel "mutex",pUnaligned,pAligned
(pUnaligned) add Remainder = -1, Remainder
(pAligned) sub Remainder = EndPtr, InPtr
(pAligned) br.cond.dptk.many .rc4Aligned
@@ -628,7 +606,8 @@ RC4:
/* Do the initial few bytes via the compact, modulo-scheduled loop
until the output pointer is 8-byte-aligned. */
- MODSCHED_RC4(.RC4AlignLoop)
+ MODSCHED_RC4_PROLOGUE
+ MODSCHED_RC4_LOOP(.RC4AlignLoop)
{
.mib
@@ -671,13 +650,7 @@ RC4:
} ;;
{
.mmi
- getf.sig LoopCount = f6 // M2 5 cyc
- nop 0x0
- nop 0x0
- } ;;
- {
- .mmi
- nop 0x0
+ getf.sig LoopCount = f6;; // M2 5 cyc
nop 0x0
shr.u LoopCount = LoopCount, 4
} ;;
@@ -747,32 +720,26 @@ $code.=<<___;
/* Do the remaining bytes via the compact, modulo-scheduled loop */
- MODSCHED_RC4(.RC4RestLoop)
-
- {
- .mmi
- nop 0x0
- nop 0x0
- zxt1 IFinal = IFinal
- } ;;
+ MODSCHED_RC4_PROLOGUE
+ MODSCHED_RC4_LOOP(.RC4RestLoop)
.rc4Complete:
{
.mmi
- ADDP KTable = -2*SZ, KTable ;;
- SKEY [KTable] = IFinal, SZ
+ add KTable = -SZ, KTable
+ add IFinal = -1, IFinal
mov ar.lc = LCSave
} ;;
{
.mii
- nop 0x0
- nop 0x0
- add RetVal = 1, r0
- }
+ SKEY [KTable] = J,-SZ
+ zxt1 IFinal = IFinal
+ mov pr = PRSave, 0x1FFFF
+ } ;;
{
.mib
- SKEY [KTable] = J
- mov pr = PRSave, 0x1FFFF
+ SKEY [KTable] = IFinal
+ add RetVal = 1, r0
br.ret.sptk.few rp
} ;;
___