summaryrefslogtreecommitdiffstats
path: root/crypto/sha/asm
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2011-05-27 13:32:34 +0000
committerAndy Polyakov <appro@openssl.org>2011-05-27 13:32:34 +0000
commit6715034002f2d7831b234c50a2a072320905cafe (patch)
treeba4c1a67183fe2bacc2d8fd5e5975822fc9b9cb9 /crypto/sha/asm
parent0ca9a483afab59f56ae4412454feac01d96c38ba (diff)
PPC assembler pack: adhere closer to ABI specs, add PowerOpen traceback data.
Diffstat (limited to 'crypto/sha/asm')
-rwxr-xr-xcrypto/sha/asm/sha1-ppc.pl83
-rwxr-xr-xcrypto/sha/asm/sha512-ppc.pl114
2 files changed, 101 insertions, 96 deletions
diff --git a/crypto/sha/asm/sha1-ppc.pl b/crypto/sha/asm/sha1-ppc.pl
index dcd0fcdfcf..2140dd2f8d 100755
--- a/crypto/sha/asm/sha1-ppc.pl
+++ b/crypto/sha/asm/sha1-ppc.pl
@@ -24,12 +24,14 @@ $flavour = shift;
if ($flavour =~ /64/) {
$SIZE_T =8;
+ $LRSAVE =2*$SIZE_T;
$UCMP ="cmpld";
$STU ="stdu";
$POP ="ld";
$PUSH ="std";
} elsif ($flavour =~ /32/) {
$SIZE_T =4;
+ $LRSAVE =$SIZE_T;
$UCMP ="cmplw";
$STU ="stwu";
$POP ="lwz";
@@ -43,7 +45,8 @@ die "can't locate ppc-xlate.pl";
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
-$FRAME=24*$SIZE_T;
+$FRAME=24*$SIZE_T+64;
+$LOCALS=6*$SIZE_T;
$K ="r0";
$sp ="r1";
@@ -162,9 +165,8 @@ $code=<<___;
.globl .sha1_block_data_order
.align 4
.sha1_block_data_order:
+ $STU $sp,-$FRAME($sp)
mflr r0
- $STU $sp,`-($FRAME+64)`($sp)
- $PUSH r0,`$FRAME-$SIZE_T*18`($sp)
$PUSH r15,`$FRAME-$SIZE_T*17`($sp)
$PUSH r16,`$FRAME-$SIZE_T*16`($sp)
$PUSH r17,`$FRAME-$SIZE_T*15`($sp)
@@ -182,6 +184,7 @@ $code=<<___;
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
+ $PUSH r0,`$FRAME+$LRSAVE`($sp)
lwz $A,0($ctx)
lwz $B,4($ctx)
lwz $C,8($ctx)
@@ -192,37 +195,14 @@ $code=<<___;
Laligned:
mtctr $num
bl Lsha1_block_private
-Ldone:
- $POP r0,`$FRAME-$SIZE_T*18`($sp)
- $POP r15,`$FRAME-$SIZE_T*17`($sp)
- $POP r16,`$FRAME-$SIZE_T*16`($sp)
- $POP r17,`$FRAME-$SIZE_T*15`($sp)
- $POP r18,`$FRAME-$SIZE_T*14`($sp)
- $POP r19,`$FRAME-$SIZE_T*13`($sp)
- $POP r20,`$FRAME-$SIZE_T*12`($sp)
- $POP r21,`$FRAME-$SIZE_T*11`($sp)
- $POP r22,`$FRAME-$SIZE_T*10`($sp)
- $POP r23,`$FRAME-$SIZE_T*9`($sp)
- $POP r24,`$FRAME-$SIZE_T*8`($sp)
- $POP r25,`$FRAME-$SIZE_T*7`($sp)
- $POP r26,`$FRAME-$SIZE_T*6`($sp)
- $POP r27,`$FRAME-$SIZE_T*5`($sp)
- $POP r28,`$FRAME-$SIZE_T*4`($sp)
- $POP r29,`$FRAME-$SIZE_T*3`($sp)
- $POP r30,`$FRAME-$SIZE_T*2`($sp)
- $POP r31,`$FRAME-$SIZE_T*1`($sp)
- mtlr r0
- addi $sp,$sp,`$FRAME+64`
- blr
-___
+ b Ldone
-# PowerPC specification allows an implementation to be ill-behaved
-# upon unaligned access which crosses page boundary. "Better safe
-# than sorry" principle makes me treat it specially. But I don't
-# look for particular offending word, but rather for 64-byte input
-# block which crosses the boundary. Once found that block is aligned
-# and hashed separately...
-$code.=<<___;
+; PowerPC specification allows an implementation to be ill-behaved
+; upon unaligned access which crosses page boundary. "Better safe
+; than sorry" principle makes me treat it specially. But I don't
+; look for particular offending word, but rather for 64-byte input
+; block which crosses the boundary. Once found that block is aligned
+; and hashed separately...
.align 4
Lunaligned:
subfic $t1,$inp,4096
@@ -237,7 +217,7 @@ Lunaligned:
Lcross_page:
li $t1,16
mtctr $t1
- addi r20,$sp,$FRAME ; spot below the frame
+ addi r20,$sp,$LOCALS ; spot within the frame
Lmemcpy:
lbz r16,0($inp)
lbz r17,1($inp)
@@ -251,15 +231,40 @@ Lmemcpy:
addi r20,r20,4
bdnz Lmemcpy
- $PUSH $inp,`$FRAME-$SIZE_T*19`($sp)
+ $PUSH $inp,`$FRAME-$SIZE_T*18`($sp)
li $t1,1
- addi $inp,$sp,$FRAME
+ addi $inp,$sp,$LOCALS
mtctr $t1
bl Lsha1_block_private
- $POP $inp,`$FRAME-$SIZE_T*19`($sp)
+ $POP $inp,`$FRAME-$SIZE_T*18`($sp)
addic. $num,$num,-1
bne- Lunaligned
- b Ldone
+
+Ldone:
+ $POP r0,`$FRAME+$LRSAVE`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+ $POP r17,`$FRAME-$SIZE_T*15`($sp)
+ $POP r18,`$FRAME-$SIZE_T*14`($sp)
+ $POP r19,`$FRAME-$SIZE_T*13`($sp)
+ $POP r20,`$FRAME-$SIZE_T*12`($sp)
+ $POP r21,`$FRAME-$SIZE_T*11`($sp)
+ $POP r22,`$FRAME-$SIZE_T*10`($sp)
+ $POP r23,`$FRAME-$SIZE_T*9`($sp)
+ $POP r24,`$FRAME-$SIZE_T*8`($sp)
+ $POP r25,`$FRAME-$SIZE_T*7`($sp)
+ $POP r26,`$FRAME-$SIZE_T*6`($sp)
+ $POP r27,`$FRAME-$SIZE_T*5`($sp)
+ $POP r28,`$FRAME-$SIZE_T*4`($sp)
+ $POP r29,`$FRAME-$SIZE_T*3`($sp)
+ $POP r30,`$FRAME-$SIZE_T*2`($sp)
+ $POP r31,`$FRAME-$SIZE_T*1`($sp)
+ mtlr r0
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,4,1,0x80,18,3,0
+ .long 0
___
# This is private block function, which uses tailored calling
@@ -309,6 +314,8 @@ $code.=<<___;
addi $inp,$inp,`16*4`
bdnz- Lsha1_block_private
blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
___
$code.=<<___;
.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
diff --git a/crypto/sha/asm/sha512-ppc.pl b/crypto/sha/asm/sha512-ppc.pl
index 768a6a6fad..6b44a68e59 100755
--- a/crypto/sha/asm/sha512-ppc.pl
+++ b/crypto/sha/asm/sha512-ppc.pl
@@ -40,6 +40,7 @@ $output =shift;
if ($flavour =~ /64/) {
$SIZE_T=8;
+ $LRSAVE=2*$SIZE_T;
$STU="stdu";
$UCMP="cmpld";
$SHL="sldi";
@@ -47,6 +48,7 @@ if ($flavour =~ /64/) {
$PUSH="std";
} elsif ($flavour =~ /32/) {
$SIZE_T=4;
+ $LRSAVE=$SIZE_T;
$STU="stwu";
$UCMP="cmplw";
$SHL="slwi";
@@ -87,7 +89,8 @@ if ($output =~ /512/) {
$SHR="srwi";
}
-$FRAME=32*$SIZE_T;
+$FRAME=32*$SIZE_T+16*$SZ;
+$LOCALS=6*$SIZE_T;
$sp ="r1";
$toc="r2";
@@ -179,13 +182,12 @@ $code=<<___;
.globl $func
.align 6
$func:
+ $STU $sp,-$FRAME($sp)
mflr r0
- $STU $sp,`-($FRAME+16*$SZ)`($sp)
$SHL $num,$num,`log(16*$SZ)/log(2)`
$PUSH $ctx,`$FRAME-$SIZE_T*22`($sp)
- $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
$PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
$PUSH r13,`$FRAME-$SIZE_T*19`($sp)
$PUSH r14,`$FRAME-$SIZE_T*18`($sp)
@@ -206,6 +208,7 @@ $func:
$PUSH r29,`$FRAME-$SIZE_T*3`($sp)
$PUSH r30,`$FRAME-$SIZE_T*2`($sp)
$PUSH r31,`$FRAME-$SIZE_T*1`($sp)
+ $PUSH r0,`$FRAME+$LRSAVE`($sp)
$LD $A,`0*$SZ`($ctx)
mr $inp,r4 ; incarnate $inp
@@ -217,7 +220,7 @@ $func:
$LD $G,`6*$SZ`($ctx)
$LD $H,`7*$SZ`($ctx)
- b LPICmeup
+ bl LPICmeup
LPICedup:
andi. r0,$inp,3
bne Lunaligned
@@ -226,40 +229,14 @@ Laligned:
$PUSH $num,`$FRAME-$SIZE_T*24`($sp) ; end pointer
$PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
bl Lsha2_block_private
-Ldone:
- $POP r0,`$FRAME-$SIZE_T*21`($sp)
- $POP $toc,`$FRAME-$SIZE_T*20`($sp)
- $POP r13,`$FRAME-$SIZE_T*19`($sp)
- $POP r14,`$FRAME-$SIZE_T*18`($sp)
- $POP r15,`$FRAME-$SIZE_T*17`($sp)
- $POP r16,`$FRAME-$SIZE_T*16`($sp)
- $POP r17,`$FRAME-$SIZE_T*15`($sp)
- $POP r18,`$FRAME-$SIZE_T*14`($sp)
- $POP r19,`$FRAME-$SIZE_T*13`($sp)
- $POP r20,`$FRAME-$SIZE_T*12`($sp)
- $POP r21,`$FRAME-$SIZE_T*11`($sp)
- $POP r22,`$FRAME-$SIZE_T*10`($sp)
- $POP r23,`$FRAME-$SIZE_T*9`($sp)
- $POP r24,`$FRAME-$SIZE_T*8`($sp)
- $POP r25,`$FRAME-$SIZE_T*7`($sp)
- $POP r26,`$FRAME-$SIZE_T*6`($sp)
- $POP r27,`$FRAME-$SIZE_T*5`($sp)
- $POP r28,`$FRAME-$SIZE_T*4`($sp)
- $POP r29,`$FRAME-$SIZE_T*3`($sp)
- $POP r30,`$FRAME-$SIZE_T*2`($sp)
- $POP r31,`$FRAME-$SIZE_T*1`($sp)
- mtlr r0
- addi $sp,$sp,`$FRAME+16*$SZ`
- blr
-___
+ b Ldone
-# PowerPC specification allows an implementation to be ill-behaved
-# upon unaligned access which crosses page boundary. "Better safe
-# than sorry" principle makes me treat it specially. But I don't
-# look for particular offending word, but rather for the input
-# block which crosses the boundary. Once found that block is aligned
-# and hashed separately...
-$code.=<<___;
+; PowerPC specification allows an implementation to be ill-behaved
+; upon unaligned access which crosses page boundary. "Better safe
+; than sorry" principle makes me treat it specially. But I don't
+; look for particular offending word, but rather for the input
+; block which crosses the boundary. Once found that block is aligned
+; and hashed separately...
.align 4
Lunaligned:
subfic $t1,$inp,4096
@@ -278,7 +255,7 @@ Lunaligned:
Lcross_page:
li $t1,`16*$SZ/4`
mtctr $t1
- addi r20,$sp,$FRAME ; aligned spot below the frame
+ addi r20,$sp,$LOCALS ; aligned spot below the frame
Lmemcpy:
lbz r16,0($inp)
lbz r17,1($inp)
@@ -293,8 +270,8 @@ Lmemcpy:
bdnz Lmemcpy
$PUSH $inp,`$FRAME-$SIZE_T*26`($sp) ; save real inp
- addi $t1,$sp,`$FRAME+16*$SZ` ; fictitious end pointer
- addi $inp,$sp,$FRAME ; fictitious inp pointer
+ addi $t1,$sp,`$LOCALS+16*$SZ` ; fictitious end pointer
+ addi $inp,$sp,$LOCALS ; fictitious inp pointer
$PUSH $num,`$FRAME-$SIZE_T*25`($sp) ; save real num
$PUSH $t1,`$FRAME-$SIZE_T*24`($sp) ; end pointer
$PUSH $inp,`$FRAME-$SIZE_T*23`($sp) ; inp pointer
@@ -303,10 +280,36 @@ Lmemcpy:
$POP $num,`$FRAME-$SIZE_T*25`($sp) ; restore real num
addic. $num,$num,`-16*$SZ` ; num--
bne- Lunaligned
- b Ldone
-___
-$code.=<<___;
+Ldone:
+ $POP r0,`$FRAME+$LRSAVE`($sp)
+ $POP $toc,`$FRAME-$SIZE_T*20`($sp)
+ $POP r13,`$FRAME-$SIZE_T*19`($sp)
+ $POP r14,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+ $POP r17,`$FRAME-$SIZE_T*15`($sp)
+ $POP r18,`$FRAME-$SIZE_T*14`($sp)
+ $POP r19,`$FRAME-$SIZE_T*13`($sp)
+ $POP r20,`$FRAME-$SIZE_T*12`($sp)
+ $POP r21,`$FRAME-$SIZE_T*11`($sp)
+ $POP r22,`$FRAME-$SIZE_T*10`($sp)
+ $POP r23,`$FRAME-$SIZE_T*9`($sp)
+ $POP r24,`$FRAME-$SIZE_T*8`($sp)
+ $POP r25,`$FRAME-$SIZE_T*7`($sp)
+ $POP r26,`$FRAME-$SIZE_T*6`($sp)
+ $POP r27,`$FRAME-$SIZE_T*5`($sp)
+ $POP r28,`$FRAME-$SIZE_T*4`($sp)
+ $POP r29,`$FRAME-$SIZE_T*3`($sp)
+ $POP r30,`$FRAME-$SIZE_T*2`($sp)
+ $POP r31,`$FRAME-$SIZE_T*1`($sp)
+ mtlr r0
+ addi $sp,$sp,$FRAME
+ blr
+ .long 0
+ .byte 0,12,4,1,0x80,18,3,0
+ .long 0
+
.align 4
Lsha2_block_private:
___
@@ -372,6 +375,8 @@ $code.=<<___;
$ST $H,`7*$SZ`($ctx)
bne Lsha2_block_private
blr
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
___
# Ugly hack here, because PPC assembler syntax seem to vary too
@@ -379,22 +384,15 @@ ___
$code.=<<___;
.align 6
LPICmeup:
- bl LPIC
- addi $Tbl,$Tbl,`64-4` ; "distance" between . and last nop
- b LPICedup
- nop
- nop
- nop
- nop
- nop
-LPIC: mflr $Tbl
+ mflr r0
+ bcl 20,31,\$+4
+ mflr $Tbl ; vvvvvv "distance" between . and 1st data entry
+ addi $Tbl,$Tbl,`64-8`
+ mtlr r0
blr
- nop
- nop
- nop
- nop
- nop
- nop
+ .long 0
+ .byte 0,12,0x14,0,0,0,0,0
+ .space `64-9*4`
___
$code.=<<___ if ($SZ==8);
.long 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd