SPARCv9 assembly pack: harmonize ABI handling (so that it's handled in one

place at a time, by pre-processor in .S case and perl - in .s).
author: Andy Polyakov <appro@openssl.org> 2012-10-25 12:07:32 +0000
committer: Andy Polyakov <appro@openssl.org> 2012-10-25 12:07:32 +0000
commit: 1efd583085ffefb4d5d11e1e599e4123351df386 (patch)
tree: e6986fc9ceaf5e337c152eaae73a415e658bf64a
parent: 8ed11a815ee62472fc197d1a1a3dcdb6c0681342 (diff)
5 files changed, 70 insertions, 82 deletions
diff --git a/crypto/bn/asm/sparcv9-gf2m.pl b/crypto/bn/asm/sparcv9-gf2m.pl
index 04b9edde88..ab94cd917c 100644
--- a/crypto/bn/asm/sparcv9-gf2m.pl
+++ b/crypto/bn/asm/sparcv9-gf2m.pl
@@ -18,23 +18,8 @@
 # ~100-230% faster than gcc-generated code and ~35-90% faster than
 # the pure SPARCv9 code path.
 
-$bits=32;
-for (@ARGV)     { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64)  { $bias=2047; $frame=192; }
-else            { $bias=0;    $frame=112; }
-
 $locals=16*8;
 
-$code.=<<___;
-#include <sparc_arch.h>
-
-.section        ".text",#alloc,#execinstr
-___
-$code.=<<___ if ($bits==64);
-.register       %g2,#scratch
-.register       %g3,#scratch
-___
-
 $tab="%l0";
 
 @T=("%g2","%g3");
@@ -44,6 +29,13 @@ $tab="%l0";
 ($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
 
 $code.=<<___;
+#include <sparc_arch.h>
+
+#ifdef __arch64__
+.register	%g2,#scratch
+.register	%g3,#scratch
+#endif
+
 #ifdef __PIC__
 SPARC_PIC_THUNK(%g1)
 #endif
@@ -74,7 +66,7 @@ bn_GF2m_mul_2x2:
 
 .align	16
 .Lsoftware:
-	save	%sp,-$frame-$locals,%sp
+	save	%sp,-STACK_FRAME-$locals,%sp
 
 	sllx	%i1,32,$a
 	mov	-1,$a12
@@ -83,7 +75,7 @@ bn_GF2m_mul_2x2:
 	srlx	$a12,1,$a48			! 0x7fff...
 	or	%i4,$b,$b
 	srlx	$a12,2,$a12			! 0x3fff...
-	add	%sp,$bias+$frame,$tab
+	add	%sp,STACK_BIAS+STACK_FRAME,$tab
 
 	sllx	$a,2,$a4
 	mov	$a,$a1
diff --git a/crypto/md5/asm/md5-sparcv9.pl b/crypto/md5/asm/md5-sparcv9.pl
index ef16666cc3..407da3c1b0 100644
--- a/crypto/md5/asm/md5-sparcv9.pl
+++ b/crypto/md5/asm/md5-sparcv9.pl
@@ -17,11 +17,6 @@
 # single-process result on 8-core processor, or ~11GBps per 2.85GHz
 # socket.
 
-$bits=32;
-for (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64)	{ $bias=2047; $frame=192; }
-else		{ $bias=0;    $frame=112; }
-
 $output=shift;
 open STDOUT,">$output";
 
@@ -198,13 +193,14 @@ $code.=<<___;
 ___
 }
 
-$code.=<<___ if ($bits==64);
-.register	%g2,#scratch
-.register	%g3,#scratch
-___
 $code.=<<___;
 #include "sparc_arch.h"
 
+#ifdef __arch64__
+.register	%g2,#scratch
+.register	%g3,#scratch
+#endif
+
 .section	".text",#alloc,#execinstr
 
 #ifdef __PIC__
@@ -246,7 +242,7 @@ md5_block_asm_data_order:
 
 	.word	0x81b02800		! MD5
 
-	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhw_loop
+	bne,pt	SIZE_T_CC, .Lhw_loop
 	nop
 
 .Lhwfinish:
@@ -287,7 +283,7 @@ md5_block_asm_data_order:
 
 	.word	0x81b02800		! MD5
 
-	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
+	bne,pt	SIZE_T_CC, .Lhwunaligned_loop
 	for	%f26, %f26, %f10	! %f10=%f26
 
 	ba	.Lhwfinish
@@ -295,7 +291,7 @@ md5_block_asm_data_order:
 
 .align	16
 .Lsoftware:
-	save	%sp,-$frame,%sp
+	save	%sp,-STACK_FRAME,%sp
 
 	rd	%asi,$saved_asi
 	wr	%g0,0x88,%asi		! ASI_PRIMARY_LITTLE
@@ -355,7 +351,7 @@ $code.=<<___;
 	add	$t2,$C,$C
 	add	$CD,$D,$D
 	srl	$B,0,$B			! clruw	$B
-	bne	`$bits==64?"%xcc":"%icc"`,.Loop
+	bne	SIZE_T_CC,.Loop
 	srl	$D,0,$D			! clruw	$D
 
 	st	$A,[$ctx+0]		! write out ctx
diff --git a/crypto/sha/asm/sha1-sparcv9.pl b/crypto/sha/asm/sha1-sparcv9.pl
index 47a82d3267..b5efcde5c1 100644
--- a/crypto/sha/asm/sha1-sparcv9.pl
+++ b/crypto/sha/asm/sha1-sparcv9.pl
@@ -25,11 +25,6 @@
 # single-process result on 8-core processor, or ~9GBps per 2.85GHz
 # socket.
 
-$bits=32;
-for (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64)	{ $bias=2047; $frame=192; }
-else		{ $bias=0;    $frame=112; }
-
 $output=shift;
 open STDOUT,">$output";
 
@@ -185,13 +180,14 @@ $code.=<<___;
 ___
 }
 
-$code.=<<___ if ($bits==64);
-.register	%g2,#scratch
-.register	%g3,#scratch
-___
 $code.=<<___;
 #include "sparc_arch.h"
 
+#ifdef __arch64__
+.register	%g2,#scratch
+.register	%g3,#scratch
+#endif
+
 .section	".text",#alloc,#execinstr
 
 #ifdef __PIC__
@@ -231,7 +227,7 @@ sha1_block_data_order:
 
 	.word	0x81b02820		! SHA1
 
-	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhw_loop
+	bne,pt	SIZE_T_CC, .Lhw_loop
 	nop
 
 .Lhwfinish:
@@ -271,7 +267,7 @@ sha1_block_data_order:
 
 	.word	0x81b02820		! SHA1
 
-	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
+	bne,pt	SIZE_T_CC, .Lhwunaligned_loop
 	for	%f26, %f26, %f10	! %f10=%f26
 
 	ba	.Lhwfinish
@@ -279,7 +275,7 @@ sha1_block_data_order:
 
 .align	16
 .Lsoftware:
-	save	%sp,-$frame,%sp
+	save	%sp,-STACK_FRAME,%sp
 	sllx	$len,6,$len
 	add	$inp,$len,$len
 
@@ -359,7 +355,7 @@ $code.=<<___;
 	add	$E,@X[4],$E
 	st	$E,[$ctx+16]
 
-	bne	`$bits==64?"%xcc":"%icc"`,.Lloop
+	bne	SIZE_T_CC,.Lloop
 	andn	$inp,7,$tmp0
 
 	ret
diff --git a/crypto/sha/asm/sha512-sparcv9.pl b/crypto/sha/asm/sha512-sparcv9.pl
index 4c749a5c8f..5a9c15d1d3 100644
--- a/crypto/sha/asm/sha512-sparcv9.pl
+++ b/crypto/sha/asm/sha512-sparcv9.pl
@@ -49,12 +49,6 @@
 # saturates at 11.5x single-process result on 8-core processor, or
 # ~11/16GBps per 2.85GHz socket.
 
-
-$bits=32;
-for (@ARGV)	{ $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
-if ($bits==64)	{ $bias=2047; $frame=192; }
-else		{ $bias=0;    $frame=112; }
-
 $output=shift;
 open STDOUT,">$output";
 
@@ -191,29 +185,29 @@ $code.=<<___ if ($i<15);
 	or	@pair[1],$tmp2,$tmp2
 	`"ld	[$inp+".eval(32+4+$i*8)."],@pair[1]"	if ($i<12)`
 	add	$h,$tmp2,$T1
-	$ST	$tmp2,[%sp+`$bias+$frame+$i*$SZ`]
+	$ST	$tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`]
 ___
 $code.=<<___ if ($i==12);
 	bnz,a,pn	%icc,.+8
 	ld	[$inp+128],%l0
 ___
 $code.=<<___ if ($i==15);
-	ld	[%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2
 	sllx	@pair[1],$tmp31,$tmp2	! Xload($i)
 	add	$tmp31,32,$tmp0
-	ld	[%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3
 	sllx	@pair[0],$tmp0,$tmp1
-	ld	[%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4
 	srlx	@pair[2],$tmp32,@pair[1]
 	or	$tmp1,$tmp2,$tmp2
-	ld	[%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5
 	or	@pair[1],$tmp2,$tmp2
-	ld	[%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6
 	add	$h,$tmp2,$T1
-	$ST	$tmp2,[%sp+`$bias+$frame+$i*$SZ`]
-	ld	[%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
-	ld	[%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
-	ld	[%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
+	$ST	$tmp2,[%sp+STACK_BIAS+STACK_FRAME+`$i*$SZ`]
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1
 ___
 } if ($SZ==8);
 
@@ -349,9 +343,9 @@ $code.=<<___;
 	or	%l3,$tmp0,$tmp0
 
 	srlx	$tmp0,@sigma0[0],$T1
-	ld	[%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+0`],%l2
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+0`],%l2
 	sllx	$tmp0,`64-@sigma0[2]`,$tmp1
-	ld	[%sp+`$bias+$frame+(($i+1+1)%16)*$SZ+4`],%l3
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+1)%16)*$SZ+4`],%l3
 	srlx	$tmp0,@sigma0[1],$tmp0
 	xor	$tmp1,$T1,$T1
 	sllx	$tmp1,`@sigma0[2]-@sigma0[1]`,$tmp1
@@ -363,9 +357,9 @@ $code.=<<___;
 	or	%l7,$tmp2,$tmp2
 
 	srlx	$tmp2,@sigma1[0],$tmp1
-	ld	[%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+0`],%l6
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+0`],%l6
 	sllx	$tmp2,`64-@sigma1[2]`,$tmp0
-	ld	[%sp+`$bias+$frame+(($i+1+14)%16)*$SZ+4`],%l7
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+14)%16)*$SZ+4`],%l7
 	srlx	$tmp2,@sigma1[1],$tmp2
 	xor	$tmp0,$tmp1,$tmp1
 	sllx	$tmp0,`@sigma1[2]-@sigma1[1]`,$tmp0
@@ -374,29 +368,30 @@ $code.=<<___;
 	xor	$tmp0,$tmp1,$tmp1
 	sllx	%l4,32,$tmp0
 	xor	$tmp2,$tmp1,$tmp1	! sigma1(X[$i+14])
-	ld	[%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+0`],%l4
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+0`],%l4
 	or	%l5,$tmp0,$tmp0
-	ld	[%sp+`$bias+$frame+(($i+1+9)%16)*$SZ+4`],%l5
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+9)%16)*$SZ+4`],%l5
 
 	sllx	%l0,32,$tmp2
 	add	$tmp1,$T1,$T1
-	ld	[%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+0`],%l0
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+0`],%l0
 	or	%l1,$tmp2,$tmp2
 	add	$tmp0,$T1,$T1		! +=X[$i+9]
-	ld	[%sp+`$bias+$frame+(($i+1+0)%16)*$SZ+4`],%l1
+	ld	[%sp+STACK_BIAS+STACK_FRAME+`(($i+1+0)%16)*$SZ+4`],%l1
 	add	$tmp2,$T1,$T1		! +=X[$i]
-	$ST	$T1,[%sp+`$bias+$frame+($i%16)*$SZ`]
+	$ST	$T1,[%sp+STACK_BIAS+STACK_FRAME+`($i%16)*$SZ`]
 ___
     &BODY_00_15(@_);
 } if ($SZ==8);
 
-$code.=<<___ if ($bits==64);
-.register	%g2,#scratch
-.register	%g3,#scratch
-___
 $code.=<<___;
 #include "sparc_arch.h"
 
+#ifdef __arch64__
+.register	%g2,#scratch
+.register	%g3,#scratch
+#endif
+
 .section	".text",#alloc,#execinstr
 
 .align	64
@@ -519,7 +514,7 @@ $code.=<<___ if ($SZ==8); 		# SHA512
 
 	.word	0x81b02860		! SHA512
 
-	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwaligned_loop
+	bne,pt	SIZE_T_CC, .Lhwaligned_loop
 	nop
 
 .Lhwfinish:
@@ -579,7 +574,7 @@ $code.=<<___ if ($SZ==8); 		# SHA512
 
 	.word	0x81b02860		! SHA512
 
-	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
+	bne,pt	SIZE_T_CC, .Lhwunaligned_loop
 	for	%f50, %f50, %f18	! %f18=%f50
 
 	ba	.Lhwfinish
@@ -612,7 +607,7 @@ $code.=<<___ if ($SZ==4); 		# SHA256
 
 	.word	0x81b02840		! SHA256
 
-	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwloop
+	bne,pt	SIZE_T_CC, .Lhwloop
 	nop
 
 .Lhwfinish:
@@ -655,7 +650,7 @@ $code.=<<___ if ($SZ==4); 		# SHA256
 
 	.word	0x81b02840		! SHA256
 
-	bne,pt	`$bits==64?"%xcc":"%icc"`, .Lhwunaligned_loop
+	bne,pt	SIZE_T_CC, .Lhwunaligned_loop
 	for	%f26, %f26, %f10	! %f10=%f26
 
 	ba	.Lhwfinish
@@ -664,7 +659,7 @@ ___
 $code.=<<___;
 .align	16
 .Lsoftware:
-	save	%sp,`-$frame-$locals`,%sp
+	save	%sp,-STACK_FRAME-$locals,%sp
 	and	$inp,`$align-1`,$tmp31
 	sllx	$len,`log(16*$SZ)/log(2)`,$len
 	andn	$inp,`$align-1`,$inp
@@ -783,7 +778,7 @@ ___
 $code.=<<___;
 	add	$inp,`16*$SZ`,$inp		! advance inp
 	cmp	$inp,$len
-	bne	`$bits==64?"%xcc":"%icc"`,.Lloop
+	bne	SIZE_T_CC,.Lloop
 	sub	$Ktbl,`($rounds-16)*$SZ`,$Ktbl	! rewind Ktbl
 
 	ret
diff --git a/crypto/sparc_arch.h b/crypto/sparc_arch.h
index 1a8fca95b8..1afef4b8a4 100644
--- a/crypto/sparc_arch.h
+++ b/crypto/sparc_arch.h
@@ -32,6 +32,10 @@
 # define __PIC__
 #endif
 
+#if defined(__SUNPRO_C) && defined(__sparcv9) && !defined(__arch64__)
+# define __arch64__
+#endif
+
 #define SPARC_PIC_THUNK(reg)	\
 	.align	32;		\
 .Lpic_thunk:			\
@@ -53,18 +57,23 @@
 	add	%o7, reg, reg
 #endif
 
-#if	(defined(__GNUC__) && defined(__arch64__)) || \
-	(defined(__SUNPRO_C) && defined(__sparcv9))
+#if defined(__arch64__)
 
 # define SPARC_LOAD_ADDRESS(SYM, reg)	\
 	setx	SYM, %o7, reg;
-# define LDPTR	ldx
+# define LDPTR		ldx
+# define SIZE_T_CC	%xcc
+# define STACK_FRAME	192
+# define STACK_BIAS	2047
 
 #else
 
 # define SPARC_LOAD_ADDRESS(SYM, reg)	\
 	set	SYM, reg;
-# define LDPTR	ld
+# define LDPTR		ld
+# define SIZE_T_CC	%icc
+# define STACK_FRAME	112
+# define STACK_BIAS	0
 # define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg)
 
 #endif
author	Andy Polyakov <appro@openssl.org>	2012-10-25 12:07:32 +0000
committer	Andy Polyakov <appro@openssl.org>	2012-10-25 12:07:32 +0000
commit	1efd583085ffefb4d5d11e1e599e4123351df386 (patch)
tree	e6986fc9ceaf5e337c152eaae73a415e658bf64a
parent	8ed11a815ee62472fc197d1a1a3dcdb6c0681342 (diff)