From a00e414faf76db8372f4e5fe04010e47e8ff90d4 Mon Sep 17 00:00:00 2001
From: Andy Polyakov <appro@openssl.org>
Date: Fri, 16 Dec 2005 17:39:57 +0000
Subject: Unify sparcv9 assembler naming and build rules among 32- and 64-bit
 builds. Engage run-time switch between bn_mul_mont_fpu and bn_mul_mont_int.

---
 crypto/bn/Makefile             |  6 ++++--
 crypto/bn/asm/sparcv8plus.S    | 15 +++++++++++++--
 crypto/bn/asm/sparcv9-mont.pl  |  5 +++--
 crypto/bn/asm/sparcv9a-mont.pl |  6 +++---
 crypto/cryptlib.c              |  2 +-
 crypto/md5/Makefile            | 14 --------------
 crypto/sparccpuid.S            |  8 ++++----
 crypto/sparcv9cap.c            | 26 +++++++++++++-------------
 8 files changed, 41 insertions(+), 41 deletions(-)

(limited to 'crypto')

diff --git a/crypto/bn/Makefile b/crypto/bn/Makefile
index 9af62f6187..c356b395b4 100644
--- a/crypto/bn/Makefile
+++ b/crypto/bn/Makefile
@@ -86,10 +86,12 @@ mo86-out.s: asm/x86-mont.pl ../perlasm/x86asm.pl
 
 sparcv8.o:	asm/sparcv8.S
 	$(CC) $(CFLAGS) -c asm/sparcv8.S
-sparcv8plus.o:	asm/sparcv8plus.S
-	$(CC) $(CFLAGS) -c asm/sparcv8plus.S
+bn-sparcv9.o:	asm/sparcv8plus.S
+	$(CC) $(CFLAGS) -c -o $@ asm/sparcv8plus.S
 sparcv9a-mont.s:	asm/sparcv9a-mont.pl
 	$(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@
+sparcv9-mont.s:		asm/sparcv9-mont.pl
+	$(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
 
 bn-mips3.o:	asm/mips3.s
 	@if [ "$(CC)" = "gcc" ]; then \
diff --git a/crypto/bn/asm/sparcv8plus.S b/crypto/bn/asm/sparcv8plus.S
index 8c56e2e7e7..63de1860f2 100644
--- a/crypto/bn/asm/sparcv8plus.S
+++ b/crypto/bn/asm/sparcv8plus.S
@@ -144,6 +144,19 @@
  *	    }
  */
 
+#if defined(__SUNPRO_C) && defined(__sparcv9)
+  /* They've said -xarch=v9 at command line */
+  .register	%g2,#scratch
+  .register	%g3,#scratch
+# define	FRAME_SIZE	-192
+#elif defined(__GNUC__) && defined(__arch64__)
+  /* They've said -m64 at command line */
+  .register	%g2,#scratch
+  .register	%g3,#scratch
+# define	FRAME_SIZE	-192
+#else 
+# define	FRAME_SIZE	-96
+#endif 
 /*
  * GNU assembler can't stand stuw:-(
  */
@@ -619,8 +632,6 @@ bn_sub_words:
  *							Andy.
  */
 
-#define FRAME_SIZE	-96
-
 /*
  * Here is register usage map for *all* routines below.
  */
diff --git a/crypto/bn/asm/sparcv9-mont.pl b/crypto/bn/asm/sparcv9-mont.pl
index 0339bfe7f3..2e12eeb578 100644
--- a/crypto/bn/asm/sparcv9-mont.pl
+++ b/crypto/bn/asm/sparcv9-mont.pl
@@ -72,7 +72,7 @@ $apj="%l5";
 $npj="%l6";
 $tpj="%l7";
 
-$fname="bn_mul_mont";
+$fname="bn_mul_mont_int";
 
 $code=<<___;
 .section	".text",#alloc,#execinstr
@@ -298,7 +298,8 @@ $fname:
 ___
 
 ########
-######## bn_sqr_mont gives up to 20% improvement over above code
+######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
+######## code without following dedicated squaring procedure.
 ########
 $sbit="%i2";		# re-use $bp!
 
diff --git a/crypto/bn/asm/sparcv9a-mont.pl b/crypto/bn/asm/sparcv9a-mont.pl
index 81d7ef608f..e1a4546387 100755
--- a/crypto/bn/asm/sparcv9a-mont.pl
+++ b/crypto/bn/asm/sparcv9a-mont.pl
@@ -32,8 +32,8 @@
 # which is perfectly usable in this context... In other words, as far
 # as HAL/Fujitsu SPARC64 family goes, talk to the author:-)
 
-# In 32-bit context the implementation implies following additional
-# limitations on input arguments:
+# The implementation implies following "non-natural" limitations on
+# input arguments:
 # - num may not be less than 4;
 # - num has to be even;
 # - ap, bp, rp, np has to be 64-bit aligned [which is not a problem
@@ -47,7 +47,7 @@
 #   noticeable(!) improvement);
 # - dedicated squaring procedure[?];
 
-$fname="bn_mul_mont";
+$fname="bn_mul_mont_fpu";
 $bits=32;
 for (@ARGV) {
 	$bits=64    if (/\-m64/        || /\-xarch\=v9/);
diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c
index 315559c71c..6464c53705 100644
--- a/crypto/cryptlib.c
+++ b/crypto/cryptlib.c
@@ -572,7 +572,7 @@ void OPENSSL_cpuid_setup(void)
 unsigned long *OPENSSL_ia32cap_loc(void) { return NULL; }
 #endif
 int OPENSSL_NONPIC_relocated = 0;
-#if !defined(OPENSSL_CPUID_SETUP)
+#if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ)
 void OPENSSL_cpuid_setup(void) {}
 #endif
 
diff --git a/crypto/md5/Makefile b/crypto/md5/Makefile
index 9ae82f26a0..495039770d 100644
--- a/crypto/md5/Makefile
+++ b/crypto/md5/Makefile
@@ -52,20 +52,6 @@ mx86-cof.s: asm/md5-586.pl ../perlasm/x86asm.pl
 mx86-out.s: asm/md5-586.pl ../perlasm/x86asm.pl
 	(cd asm; $(PERL) md5-586.pl a.out $(CFLAGS) > ../$@)
 
-md5-sparcv8plus.o: asm/md5-sparcv9.S
-	$(CC) $(ASFLAGS) -DMD5_BLOCK_DATA_ORDER -c \
-		-o md5-sparcv8plus.o asm/md5-sparcv9.S
-
-# Old GNU assembler doesn't understand V9 instructions, so we
-# hire /usr/ccs/bin/as to do the job. Note that option is called
-# *-gcc27, but even gcc 2>=8 users may experience similar problem
-# if they didn't bother to upgrade GNU assembler. Such users should
-# not choose this option, but be adviced to *remove* GNU assembler
-# or upgrade it.
-md5-sparcv8plus-gcc27.o: asm/md5-sparcv9.S
-	$(CC) $(ASFLAGS) -DMD5_BLOCK_DATA_ORDER -E asm/md5-sparcv9.S | \
-		/usr/ccs/bin/as -xarch=v8plus - -o md5-sparcv8plus-gcc27.o
-
 md5-sparcv9.o: asm/md5-sparcv9.S
 	$(CC) $(ASFLAGS) -DMD5_BLOCK_DATA_ORDER -c \
 		-o md5-sparcv9.o asm/md5-sparcv9.S
diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S
index ac57472dd6..52308abca6 100644
--- a/crypto/sparccpuid.S
+++ b/crypto/sparccpuid.S
@@ -215,9 +215,9 @@ OPENSSL_atomic_add:
 	sra	%o0,%g0,%o0	! we return signed int, remember?
 .size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
 
-.global	OPENSSL_rdtick
+.global	_sparcv9_rdtick
 .align	32
-OPENSSL_rdtick:
+_sparcv9_rdtick:
 	subcc	%g0,1,%o0
 	.word	0x91408000	!rd	%ccr,%o0
 	cmp	%o0,0x99
@@ -229,8 +229,8 @@ OPENSSL_rdtick:
 .notick:
 	retl
 	xor	%o1,%o1,%o1
-.type	OPENSSL_rdtick,#function
-.size	OPENSSL_rdtick,.-OPENSSL_rdtick
+.type	_sparcv9_rdtick,#function
+.size	_sparcv9_rdtick,.-_sparcv9_rdtick
 
 .section	".init",#alloc,#execinstr
 	call	OPENSSL_cpuid_setup
diff --git a/crypto/sparcv9cap.c b/crypto/sparcv9cap.c
index 7012f78604..db6f703052 100644
--- a/crypto/sparcv9cap.c
+++ b/crypto/sparcv9cap.c
@@ -16,7 +16,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
 	int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
 	int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
 
-	if (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1) ==
+	if ((OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) ==
 		(SPARCV9_PREFER_FPU|SPARCV9_VIS1))
 		return bn_mul_mont_fpu(rp,ap,bp,np,n0,num);
 	else
@@ -25,7 +25,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
 
 unsigned long OPENSSL_rdtsc(void)
 	{
-	unsigned long OPENSSL_rdtick(void);
+	unsigned long _sparcv9_rdtick(void);
 
 	if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED)
 #if defined(__sun) && defined(__SVR4)
@@ -34,7 +34,7 @@ unsigned long OPENSSL_rdtsc(void)
 		return 0;
 #endif
 	else
-		return OPENSSL_rdtick();
+		return _sparcv9_rdtick();
 	}
 
 #if defined(__sun) && defined(__SVR4)
@@ -79,14 +79,18 @@ void OPENSSL_cpuid_setup(void)
 	{
 	void *h;
 	char *e;
+	static int trigger=0;
 
-	if (e=getenv("OPENSSL_sparcv9cap"))
+	if (trigger) return;
+	trigger=1;
+
+	if ((e=getenv("OPENSSL_sparcv9cap")))
 		{
 		OPENSSL_sparcv9cap_P=strtoul(e,NULL,0);
 		return;
 		}
 
-	if (h = dlopen("libdevinfo.so.1",RTLD_LAZY)) do
+	if ((h = dlopen("libdevinfo.so.1",RTLD_LAZY))) do
 		{
 		di_init_t	di_init;
 		di_fini_t	di_fini;
@@ -110,24 +114,20 @@ void OPENSSL_cpuid_setup(void)
 	if (h) dlclose(h);
 	}
 
-#elif defined(__linux)
+#else
 
-void OPENSSL_cpuid_setup(void)
+void OPENSSL_cpucap_setup(void)
 	{
 	char *e;
  
-	if (e=getenv("OPENSSL_sparcv9cap"))
+	if ((e=getenv("OPENSSL_sparcv9cap")))
 		{
 		OPENSSL_sparcv9cap_P=strtoul(env,NULL,0);
 		return;
 		}
 
-	/* Linux apparently supports UltraSPARC-I/II/III only */
+	/* For now we assume that the rest supports UltraSPARC-I* only */
 	OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1;
 	}
 
-#else
-
-void OPENSSL_cpuid_setup(void) {}
-
 #endif
-- 
cgit v1.2.3