From a00e414faf76db8372f4e5fe04010e47e8ff90d4 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 16 Dec 2005 17:39:57 +0000 Subject: Unify sparcv9 assembler naming and build rules among 32- and 64-bit builds. Engage run-time switch between bn_mul_mont_fpu and bn_mul_mont_int. --- crypto/bn/Makefile | 6 ++++-- crypto/bn/asm/sparcv8plus.S | 15 +++++++++++++-- crypto/bn/asm/sparcv9-mont.pl | 5 +++-- crypto/bn/asm/sparcv9a-mont.pl | 6 +++--- crypto/cryptlib.c | 2 +- crypto/md5/Makefile | 14 -------------- crypto/sparccpuid.S | 8 ++++---- crypto/sparcv9cap.c | 26 +++++++++++++------------- 8 files changed, 41 insertions(+), 41 deletions(-) (limited to 'crypto') diff --git a/crypto/bn/Makefile b/crypto/bn/Makefile index 9af62f6187..c356b395b4 100644 --- a/crypto/bn/Makefile +++ b/crypto/bn/Makefile @@ -86,10 +86,12 @@ mo86-out.s: asm/x86-mont.pl ../perlasm/x86asm.pl sparcv8.o: asm/sparcv8.S $(CC) $(CFLAGS) -c asm/sparcv8.S -sparcv8plus.o: asm/sparcv8plus.S - $(CC) $(CFLAGS) -c asm/sparcv8plus.S +bn-sparcv9.o: asm/sparcv8plus.S + $(CC) $(CFLAGS) -c -o $@ asm/sparcv8plus.S sparcv9a-mont.s: asm/sparcv9a-mont.pl $(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@ +sparcv9-mont.s: asm/sparcv9-mont.pl + $(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@ bn-mips3.o: asm/mips3.s @if [ "$(CC)" = "gcc" ]; then \ diff --git a/crypto/bn/asm/sparcv8plus.S b/crypto/bn/asm/sparcv8plus.S index 8c56e2e7e7..63de1860f2 100644 --- a/crypto/bn/asm/sparcv8plus.S +++ b/crypto/bn/asm/sparcv8plus.S @@ -144,6 +144,19 @@ * } */ +#if defined(__SUNPRO_C) && defined(__sparcv9) + /* They've said -xarch=v9 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME_SIZE -192 +#elif defined(__GNUC__) && defined(__arch64__) + /* They've said -m64 at command line */ + .register %g2,#scratch + .register %g3,#scratch +# define FRAME_SIZE -192 +#else +# define FRAME_SIZE -96 +#endif /* * GNU assembler can't stand stuw:-( */ @@ -619,8 +632,6 @@ bn_sub_words: * Andy. */ -#define FRAME_SIZE -96 - /* * Here is register usage map for *all* routines below. */ diff --git a/crypto/bn/asm/sparcv9-mont.pl b/crypto/bn/asm/sparcv9-mont.pl index 0339bfe7f3..2e12eeb578 100644 --- a/crypto/bn/asm/sparcv9-mont.pl +++ b/crypto/bn/asm/sparcv9-mont.pl @@ -72,7 +72,7 @@ $apj="%l5"; $npj="%l6"; $tpj="%l7"; -$fname="bn_mul_mont"; +$fname="bn_mul_mont_int"; $code=<<___; .section ".text",#alloc,#execinstr @@ -298,7 +298,8 @@ $fname: ___ ######## -######## bn_sqr_mont gives up to 20% improvement over above code +######## .Lbn_sqr_mont gives up to 20% *overall* improvement over +######## code without following dedicated squaring procedure. ######## $sbit="%i2"; # re-use $bp! diff --git a/crypto/bn/asm/sparcv9a-mont.pl b/crypto/bn/asm/sparcv9a-mont.pl index 81d7ef608f..e1a4546387 100755 --- a/crypto/bn/asm/sparcv9a-mont.pl +++ b/crypto/bn/asm/sparcv9a-mont.pl @@ -32,8 +32,8 @@ # which is perfectly usable in this context... In other words, as far # as HAL/Fujitsu SPARC64 family goes, talk to the author:-) -# In 32-bit context the implementation implies following additional -# limitations on input arguments: +# The implementation implies following "non-natural" limitations on +# input arguments: # - num may not be less than 4; # - num has to be even; # - ap, bp, rp, np has to be 64-bit aligned [which is not a problem @@ -47,7 +47,7 @@ # noticeable(!) improvement); # - dedicated squaring procedure[?]; -$fname="bn_mul_mont"; +$fname="bn_mul_mont_fpu"; $bits=32; for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c index 315559c71c..6464c53705 100644 --- a/crypto/cryptlib.c +++ b/crypto/cryptlib.c @@ -572,7 +572,7 @@ void OPENSSL_cpuid_setup(void) unsigned long *OPENSSL_ia32cap_loc(void) { return NULL; } #endif int OPENSSL_NONPIC_relocated = 0; -#if !defined(OPENSSL_CPUID_SETUP) +#if !defined(OPENSSL_CPUID_SETUP) && !defined(OPENSSL_CPUID_OBJ) void OPENSSL_cpuid_setup(void) {} #endif diff --git a/crypto/md5/Makefile b/crypto/md5/Makefile index 9ae82f26a0..495039770d 100644 --- a/crypto/md5/Makefile +++ b/crypto/md5/Makefile @@ -52,20 +52,6 @@ mx86-cof.s: asm/md5-586.pl ../perlasm/x86asm.pl mx86-out.s: asm/md5-586.pl ../perlasm/x86asm.pl (cd asm; $(PERL) md5-586.pl a.out $(CFLAGS) > ../$@) -md5-sparcv8plus.o: asm/md5-sparcv9.S - $(CC) $(ASFLAGS) -DMD5_BLOCK_DATA_ORDER -c \ - -o md5-sparcv8plus.o asm/md5-sparcv9.S - -# Old GNU assembler doesn't understand V9 instructions, so we -# hire /usr/ccs/bin/as to do the job. Note that option is called -# *-gcc27, but even gcc 2>=8 users may experience similar problem -# if they didn't bother to upgrade GNU assembler. Such users should -# not choose this option, but be adviced to *remove* GNU assembler -# or upgrade it. -md5-sparcv8plus-gcc27.o: asm/md5-sparcv9.S - $(CC) $(ASFLAGS) -DMD5_BLOCK_DATA_ORDER -E asm/md5-sparcv9.S | \ - /usr/ccs/bin/as -xarch=v8plus - -o md5-sparcv8plus-gcc27.o - md5-sparcv9.o: asm/md5-sparcv9.S $(CC) $(ASFLAGS) -DMD5_BLOCK_DATA_ORDER -c \ -o md5-sparcv9.o asm/md5-sparcv9.S diff --git a/crypto/sparccpuid.S b/crypto/sparccpuid.S index ac57472dd6..52308abca6 100644 --- a/crypto/sparccpuid.S +++ b/crypto/sparccpuid.S @@ -215,9 +215,9 @@ OPENSSL_atomic_add: sra %o0,%g0,%o0 ! we return signed int, remember? .size OPENSSL_atomic_add,.-OPENSSL_atomic_add -.global OPENSSL_rdtick +.global _sparcv9_rdtick .align 32 -OPENSSL_rdtick: +_sparcv9_rdtick: subcc %g0,1,%o0 .word 0x91408000 !rd %ccr,%o0 cmp %o0,0x99 @@ -229,8 +229,8 @@ OPENSSL_rdtick: .notick: retl xor %o1,%o1,%o1 -.type OPENSSL_rdtick,#function -.size OPENSSL_rdtick,.-OPENSSL_rdtick +.type _sparcv9_rdtick,#function +.size _sparcv9_rdtick,.-_sparcv9_rdtick .section ".init",#alloc,#execinstr call OPENSSL_cpuid_setup diff --git a/crypto/sparcv9cap.c b/crypto/sparcv9cap.c index 7012f78604..db6f703052 100644 --- a/crypto/sparcv9cap.c +++ b/crypto/sparcv9cap.c @@ -16,7 +16,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U int bn_mul_mont_fpu(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); int bn_mul_mont_int(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); - if (OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1) == + if ((OPENSSL_sparcv9cap_P&(SPARCV9_PREFER_FPU|SPARCV9_VIS1)) == (SPARCV9_PREFER_FPU|SPARCV9_VIS1)) return bn_mul_mont_fpu(rp,ap,bp,np,n0,num); else @@ -25,7 +25,7 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U unsigned long OPENSSL_rdtsc(void) { - unsigned long OPENSSL_rdtick(void); + unsigned long _sparcv9_rdtick(void); if (OPENSSL_sparcv9cap_P&SPARCV9_TICK_PRIVILEGED) #if defined(__sun) && defined(__SVR4) @@ -34,7 +34,7 @@ unsigned long OPENSSL_rdtsc(void) return 0; #endif else - return OPENSSL_rdtick(); + return _sparcv9_rdtick(); } #if defined(__sun) && defined(__SVR4) @@ -79,14 +79,18 @@ void OPENSSL_cpuid_setup(void) { void *h; char *e; + static int trigger=0; - if (e=getenv("OPENSSL_sparcv9cap")) + if (trigger) return; + trigger=1; + + if ((e=getenv("OPENSSL_sparcv9cap"))) { OPENSSL_sparcv9cap_P=strtoul(e,NULL,0); return; } - if (h = dlopen("libdevinfo.so.1",RTLD_LAZY)) do + if ((h = dlopen("libdevinfo.so.1",RTLD_LAZY))) do { di_init_t di_init; di_fini_t di_fini; @@ -110,24 +114,20 @@ void OPENSSL_cpuid_setup(void) if (h) dlclose(h); } -#elif defined(__linux) +#else -void OPENSSL_cpuid_setup(void) +void OPENSSL_cpucap_setup(void) { char *e; - if (e=getenv("OPENSSL_sparcv9cap")) + if ((e=getenv("OPENSSL_sparcv9cap"))) { OPENSSL_sparcv9cap_P=strtoul(env,NULL,0); return; } - /* Linux apparently supports UltraSPARC-I/II/III only */ + /* For now we assume that the rest supports UltraSPARC-I* only */ OPENSSL_sparcv9cap_P |= SPARCV9_PREFER_FPU|SPARCV9_VIS1; } -#else - -void OPENSSL_cpuid_setup(void) {} - #endif -- cgit v1.2.3