summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--crypto/perlasm/x86unix.pl41
-rw-r--r--crypto/x86cpuid.pl32
2 files changed, 32 insertions, 41 deletions
diff --git a/crypto/perlasm/x86unix.pl b/crypto/perlasm/x86unix.pl
index e71050b6bc..53507b6b84 100644
--- a/crypto/perlasm/x86unix.pl
+++ b/crypto/perlasm/x86unix.pl
@@ -541,50 +541,13 @@ sub main'set_label
sub main'file_end
{
# try to detect if SSE2 or MMX extensions were used on ELF platform...
- if ($main'elf && grep {/%[x]*mm[0-7]/i} @out) {
+ if ($main'elf && grep {/\b%[x]*mm[0-7]\b|OPENSSL_ia32cap_P\b/i} @out) {
local($tmp);
push (@out,"\n.section\t.bss\n");
push (@out,".comm\t${under}OPENSSL_ia32cap_P,4,4\n");
- push (@out,".section\t.init\n");
- # One can argue that it's wasteful to craft every
- # SSE/MMX module with this snippet... Well, it's 72
- # bytes long and for the moment we have two modules.
- # Let's argue when we have 7 modules or so...
- #
- # $1<<10 sets a reserved bit to signal that variable
- # was initialized already...
- &main'picmeup("edx","OPENSSL_ia32cap_P");
- $tmp=<<___;
- cmpl \$0,(%edx)
- jne 1f
- movl \$1<<10,(%edx)
- pushf
- popl %eax
- movl %eax,%ecx
- xorl \$1<<21,%eax
- pushl %eax
- popf
- pushf
- popl %eax
- xorl %ecx,%eax
- btl \$21,%eax
- jnc 1f
- pushl %edi
- pushl %ebx
- movl %edx,%edi
- movl \$1,%eax
- .byte 0x0f,0xa2
- orl \$1<<10,%edx
- movl %edx,0(%edi)
- popl %ebx
- popl %edi
- jmp 1f
- .align $align
- 1:
-___
- push (@out,$tmp);
+ return;
}
if ($const ne "")
diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl
index c53c9bc998..4408ef2936 100644
--- a/crypto/x86cpuid.pl
+++ b/crypto/x86cpuid.pl
@@ -19,13 +19,41 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&pop ("eax");
&xor ("ecx","eax");
&bt ("ecx",21);
- &jnc (&label("nocpuid"));
+ &jnc (&label("done"));
+ &xor ("eax","eax");
+ &cpuid ();
+ &xor ("eax","eax");
+ &cmp ("ebx",0x756e6547); # "Genu"
+ &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax"));
+ &mov ("ebp","eax");
+ &cmp ("edx",0x49656e69); # "ineI"
+ &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax"));
+ &or ("ebp","eax");
+ &cmp ("ecx",0x6c65746e); # "ntel"
+ &data_byte(0x0f,0x95,0xc0); #&setne (&LB("eax"));
+ &or ("ebp","eax");
&mov ("eax",1);
&cpuid ();
-&set_label("nocpuid");
+ &cmp ("ebp",0);
+ &jne (&label("notP4"));
+ &and ("eax",15<<8); # familiy ID
+ &cmp ("eax",15<<8); # P4?
+ &jne (&label("notP4"));
+ &or ("edx",1<<20); # use reserved bit to engage RC4_CHAR
+&set_label("notP4");
+ &bt ("edx",28); # test hyper-threading bit
+ &jnc (&label("done"));
+ &shr ("ebx",16);
+ &and ("ebx",0xff);
+ &cmp ("ebx",1); # see if cache is shared(*)
+ &ja (&label("done"));
+ &and ("edx",0xefffffff); # clear hyper-threading bit if not
+&set_label("done");
&mov ("eax","edx");
&mov ("edx","ecx");
&function_end("OPENSSL_ia32_cpuid");
+# (*) on Core2 this value is set to 2 denoting the fact that L2
+# cache is shared between cores.
&external_label("OPENSSL_ia32cap_P");