From 761393bba79700d48dc1b4b67b928488c9f99397 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 14 May 2009 18:17:26 +0000 Subject: x86[_64]cpuid.pl: further refine shared cache detection. --- crypto/x86cpuid.pl | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'crypto/x86cpuid.pl') diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index 36c79ca01e..e5dcc58124 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -23,6 +23,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &jnc (&label("done")); &xor ("eax","eax"); &cpuid (); + &mov ("edi","eax"); # max value for standard query level + &xor ("eax","eax"); &cmp ("ebx",0x756e6547); # "Genu" &setne (&LB("eax")); @@ -33,7 +35,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &cmp ("ecx",0x6c65746e); # "ntel" &setne (&LB("eax")); &or ("ebp","eax"); # 0 indicates Intel CPU - &mov ("esi",1); # "number of [AMD] cores" &jz (&label("intel")); &cmp ("ebx",0x68747541); # "Auth" @@ -47,10 +48,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &or ("esi","eax"); # 0 indicates AMD CPU &jnz (&label("intel")); + # AMD specific &mov ("eax",0x80000000); &cpuid (); &cmp ("eax",0x80000008); - &mov ("esi",1); # "number of [AMD] cores" &jb (&label("intel")); &mov ("eax",0x80000008); @@ -58,7 +59,30 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &movz ("esi",&LB("ecx")); # number of cores - 1 &inc ("esi"); # number of cores + &mov ("eax",1); + &cpuid (); + &bt ("edx",28); + &jnc (&label("done")); + &shr ("ebx",16); + &and ("ebx",0xff); + &cmp ("ebx","esi"); + &ja (&label("done")); + &and ("edx",0xefffffff); # clear hyper-threading bit + &jmp (&label("done")); + &set_label("intel"); + &cmp ("edi",4); + &mov ("edi",-1); + &jb (&label("nocacheinfo")); + + &mov ("eax",4); + &mov ("ecx",0); # query L1D + &cpuid (); + &mov ("edi","eax"); + &shr ("edi",14); + &and ("edi",0xfff); # number of cores -1 per L1D + +&set_label("nocacheinfo"); &mov ("eax",1); &cpuid (); &cmp ("ebp",0); @@ -70,17 +94,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &set_label("notP4"); &bt ("edx",28); # test hyper-threading bit &jnc (&label("done")); + &and ("edx",0xefffffff); + &cmp ("edi",0); + &je (&label("done")); + + &or ("edx",0x10000000); &shr ("ebx",16); - &and ("ebx",0xff); - &cmp ("ebx","esi"); # see if cache is shared(*) + &cmp (&LB("ebx"),1); &ja (&label("done")); &and ("edx",0xefffffff); # clear hyper-threading bit if not &set_label("done"); &mov ("eax","edx"); &mov ("edx","ecx"); &function_end("OPENSSL_ia32_cpuid"); -# (*) on Core2 this value is set to 2 denoting the fact that L2 -# cache is shared between cores. &external_label("OPENSSL_ia32cap_P"); -- cgit v1.2.3