diff options
Diffstat (limited to 'hwloc-1.2.1/src/topology-x86.c')
-rw-r--r-- | hwloc-1.2.1/src/topology-x86.c | 535 |
1 files changed, 535 insertions, 0 deletions
diff --git a/hwloc-1.2.1/src/topology-x86.c b/hwloc-1.2.1/src/topology-x86.c new file mode 100644 index 00000000..e1e7475a --- /dev/null +++ b/hwloc-1.2.1/src/topology-x86.c @@ -0,0 +1,535 @@ +/* + * Copyright © 2010 INRIA. All rights reserved. + * Copyright © 2010-2011 Université Bordeaux 1 + * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + * + * + * This backend is only used when the operating system does not export + * the necessary hardware topology information to user-space applications. + * Currently, only the FreeBSD backend relies on this x86 backend. + * + * Other backends such as Linux have their own way to retrieve various + * pieces of hardware topology information from the operating system + * on various architectures, without having to use this x86-specific code. + */ + +#include <private/autogen/config.h> +#include <hwloc.h> +#include <private/private.h> +#include <private/debug.h> +#include <private/cpuid.h> +#include <private/misc.h> + +struct cacheinfo { + unsigned type; + unsigned level; + unsigned nbthreads_sharing; + + unsigned linesize; + unsigned linepart; + unsigned ways; + unsigned sets; + unsigned size; +}; + +struct procinfo { + unsigned present; + unsigned apicid; + unsigned max_log_proc; + unsigned max_nbcores; + unsigned max_nbthreads; + unsigned socketid; + unsigned logprocid; + unsigned threadid; + unsigned coreid; + unsigned *otherids; + unsigned levels; + unsigned numcaches; + struct cacheinfo *cache; +}; + +enum cpuid_type { + intel, + amd, + unknown +}; + +static void fill_amd_cache(struct procinfo *infos, unsigned level, unsigned cpuid) +{ + struct cacheinfo *cache; + unsigned cachenum; + unsigned size = 0; + + if (level == 1) + size = ((cpuid >> 24)) << 10; + else if (level == 2) + size = ((cpuid >> 16)) << 10; + else if (level == 3) + size = ((cpuid >> 18)) << 19; + if (!size) + return; + + cachenum = infos->numcaches++; + infos->cache = realloc(infos->cache, infos->numcaches*sizeof(*infos->cache)); + cache = &infos->cache[cachenum]; + + cache->type = 1; + cache->level = level; + if (level <= 2) + cache->nbthreads_sharing = 1; + else + cache->nbthreads_sharing = infos->max_log_proc; + cache->linesize = cpuid & 0xff; + cache->linepart = 0; + if (level == 1) + cache->ways = (cpuid >> 16) & 0xff; + else { + static const unsigned ways_tab[] = { 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0 }; + unsigned ways = (cpuid >> 12) & 0xf; + cache->ways = ways_tab[ways]; + } + cache->size = size; + cache->sets = 0; + + hwloc_debug("cache L%u t%u linesize %u ways %u size %uKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10); +} + +/* Fetch information from the processor itself thanks to cpuid and store it in + * infos for summarize to analyze them globally */ +static void look_proc(struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, enum cpuid_type cpuid_type) +{ + unsigned eax, ebx, ecx = 0, edx; + unsigned cachenum; + struct cacheinfo *cache; + + infos->present = 1; + + eax = 0x01; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + infos->apicid = ebx >> 24; + if (edx & (1 << 28)) + infos->max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1); + else + infos->max_log_proc = 1; + hwloc_debug("APIC ID 0x%02x max_log_proc %u\n", infos->apicid, infos->max_log_proc); + infos->socketid = infos->apicid / infos->max_log_proc; + infos->logprocid = infos->apicid % infos->max_log_proc; + infos->coreid = (unsigned) -1; + infos->threadid = (unsigned) -1; + hwloc_debug("phys %u thread %u\n", infos->socketid, infos->logprocid); + + /* Intel doesn't actually provide 0x80000008 information */ + if (cpuid_type != intel && highest_ext_cpuid >= 0x80000008) { + unsigned coreidsize; + eax = 0x80000008; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + coreidsize = (ecx >> 12) & 0xf; + hwloc_debug("core ID size: %u\n", coreidsize); + if (!coreidsize) { + infos->max_nbcores = (ecx & 0xff) + 1; + } else + infos->max_nbcores = 1 << coreidsize; + hwloc_debug("Thus max # of cores: %u\n", infos->max_nbcores); + /* Still no multithreaded AMD */ + infos->max_nbthreads = 1 ; + hwloc_debug("and max # of threads: %u\n", infos->max_nbthreads); + infos->threadid = infos->logprocid % infos->max_nbthreads; + infos->coreid = infos->logprocid / infos->max_nbthreads; + hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid); + } + + infos->numcaches = 0; + infos->cache = NULL; + + /* Intel doesn't actually provide 0x80000005 information */ + if (cpuid_type != intel && highest_ext_cpuid >= 0x80000005) { + eax = 0x80000005; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + fill_amd_cache(infos, 1, ecx); + } + + /* Intel doesn't actually provide 0x80000006 information */ + if (cpuid_type != intel && highest_ext_cpuid >= 0x80000006) { + eax = 0x80000006; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + fill_amd_cache(infos, 2, ecx); + fill_amd_cache(infos, 3, edx); + } + + /* AMD doesn't actually provide 0x04 information */ + if (cpuid_type != amd && highest_cpuid >= 0x04) { + cachenum = 0; + for (cachenum = 0; ; cachenum++) { + unsigned type; + eax = 0x04; + ecx = cachenum; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + + type = eax & 0x1f; + + hwloc_debug("cache %u type %u\n", cachenum, type); + + if (type == 0) + break; + if (type == 2) + /* Instruction cache */ + continue; + infos->numcaches++; + } + + cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); + + for (cachenum = 0; ; cachenum++) { + unsigned linesize, linepart, ways, sets; + unsigned type; + eax = 0x04; + ecx = cachenum; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + + type = eax & 0x1f; + + if (type == 0) + break; + if (type == 2) + /* Instruction cache */ + continue; + + cache->type = type; + + cache->level = (eax >> 5) & 0x7; + cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1; + infos->max_nbcores = ((eax >> 26) & 0x3f) + 1; + + cache->linesize = linesize = (ebx & 0xfff) + 1; + cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1; + cache->ways = ways = ((ebx >> 22) & 0x3ff) + 1; + cache->sets = sets = ecx + 1; + cache->size = linesize * linepart * ways * sets; + + hwloc_debug("cache %u type %u L%u t%u c%u linesize %u linepart %u ways %u sets %u, size %uKB\n", cachenum, cache->type, cache->level, cache->nbthreads_sharing, infos->max_nbcores, linesize, linepart, ways, sets, cache->size >> 10); + infos->max_nbthreads = infos->max_log_proc / infos->max_nbcores; + hwloc_debug("thus %u threads\n", infos->max_nbthreads); + infos->threadid = infos->logprocid % infos->max_nbthreads; + infos->coreid = infos->logprocid / infos->max_nbthreads; + hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid); + + cache++; + } + } + + if (cpuid_type == intel && highest_cpuid >= 0x0b) { + unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id; + for (level = 0; ; level++) { + ecx = level; + eax = 0x0b; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + if (!eax && !ebx) + break; + } + if (level) { + infos->levels = level; + infos->otherids = malloc(level * sizeof(*infos->otherids)); + for (level = 0; ; level++) { + ecx = level; + eax = 0x0b; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + if (!eax && !ebx) + break; + apic_nextshift = eax & 0x1f; + apic_number = ebx & 0xffff; + apic_type = (ecx & 0xff00) >> 8; + apic_id = edx; + id = (apic_id >> apic_shift) & ((1 << (apic_nextshift - apic_shift)) - 1); + hwloc_debug("x2APIC %08x %d: nextshift %d num %2d type %d id %2d\n", apic_id, level, apic_nextshift, apic_number, apic_type, id); + infos->apicid = apic_id; + infos->otherids[level] = UINT_MAX; + switch (apic_type) { + case 1: + infos->threadid = id; + break; + case 2: + infos->coreid = id; + break; + default: + hwloc_debug("x2APIC %d: unknown type %d\n", level, apic_type); + infos->otherids[level] = apic_id >> apic_shift; + break; + } + apic_shift = apic_nextshift; + } + infos->socketid = apic_id >> apic_shift; + hwloc_debug("x2APIC remainder: %d\n", infos->socketid); + } else + infos->otherids = NULL; + } else + infos->otherids = NULL; +} + +/* Analyse information stored in infos, and build topology levels accordingly */ +static void summarize(hwloc_topology_t topology, struct procinfo *infos, unsigned nbprocs) +{ + hwloc_bitmap_t complete_cpuset = hwloc_bitmap_alloc(); + unsigned i, j, l, level; + int one = -1; + + for (i = 0; i < nbprocs; i++) + if (infos[i].present) { + hwloc_bitmap_set(complete_cpuset, i); + one = i; + } + + if (one == -1) + return; + + /* Look for sockets */ + { + hwloc_bitmap_t sockets_cpuset = hwloc_bitmap_dup(complete_cpuset); + hwloc_bitmap_t socket_cpuset; + hwloc_obj_t sock; + + while ((i = hwloc_bitmap_first(sockets_cpuset)) != (unsigned) -1) { + unsigned socketid = infos[i].socketid; + + socket_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].socketid == socketid) { + hwloc_bitmap_set(socket_cpuset, j); + hwloc_bitmap_clr(sockets_cpuset, j); + } + } + sock = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, socketid); + sock->cpuset = socket_cpuset; + hwloc_debug_1arg_bitmap("os socket %u has cpuset %s\n", + socketid, socket_cpuset); + hwloc_insert_object_by_cpuset(topology, sock); + } + hwloc_bitmap_free(sockets_cpuset); + } + + /* Look for unknown objects */ + if (infos[one].otherids) { + for (level = infos[one].levels-1; level <= infos[one].levels-1; level--) { + if (infos[one].otherids[level] != UINT_MAX) { + hwloc_bitmap_t unknowns_cpuset = hwloc_bitmap_dup(complete_cpuset); + hwloc_bitmap_t unknown_cpuset; + hwloc_obj_t unknown; + + while ((i = hwloc_bitmap_first(unknowns_cpuset)) != (unsigned) -1) { + unsigned unknownid = infos[i].otherids[level]; + + unknown_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].otherids[level] == unknownid) { + hwloc_bitmap_set(unknown_cpuset, j); + hwloc_bitmap_clr(unknowns_cpuset, j); + } + } + unknown = hwloc_alloc_setup_object(HWLOC_OBJ_MISC, unknownid); + unknown->cpuset = unknown_cpuset; + unknown->os_level = level; + hwloc_debug_2args_bitmap("os unknown%d %u has cpuset %s\n", + level, unknownid, unknown_cpuset); + hwloc_insert_object_by_cpuset(topology, unknown); + } + hwloc_bitmap_free(unknowns_cpuset); + } + } + } + + /* Look for cores */ + { + hwloc_bitmap_t cores_cpuset = hwloc_bitmap_dup(complete_cpuset); + hwloc_bitmap_t core_cpuset; + hwloc_obj_t core; + + while ((i = hwloc_bitmap_first(cores_cpuset)) != (unsigned) -1) { + unsigned socketid = infos[i].socketid; + unsigned coreid = infos[i].coreid; + + if (coreid == (unsigned) -1) { + hwloc_bitmap_clr(cores_cpuset, i); + continue; + } + + core_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + if (infos[j].coreid == (unsigned) -1) { + hwloc_bitmap_clr(cores_cpuset, j); + continue; + } + + if (infos[j].socketid == socketid && infos[j].coreid == coreid) { + hwloc_bitmap_set(core_cpuset, j); + hwloc_bitmap_clr(cores_cpuset, j); + } + } + core = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, coreid); + core->cpuset = core_cpuset; + hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n", + coreid, core_cpuset); + hwloc_insert_object_by_cpuset(topology, core); + } + hwloc_bitmap_free(cores_cpuset); + } + + /* Look for caches */ + /* First find max level */ + level = 0; + for (i = 0; i < nbprocs; i++) + for (j = 0; j < infos[i].numcaches; j++) + if (infos[i].cache[j].level > level) + level = infos[i].cache[j].level; + + while (level > 0) { + /* Look for caches at level level */ + { + hwloc_bitmap_t caches_cpuset = hwloc_bitmap_dup(complete_cpuset); + hwloc_bitmap_t cache_cpuset; + hwloc_obj_t cache; + + while ((i = hwloc_bitmap_first(caches_cpuset)) != (unsigned) -1) { + unsigned socketid = infos[i].socketid; + + for (l = 0; l < infos[i].numcaches; l++) { + if (infos[i].cache[l].level == level) + break; + } + if (l == infos[i].numcaches) { + /* no cache Llevel in i, odd */ + hwloc_bitmap_clr(caches_cpuset, i); + continue; + } + + { + unsigned cacheid = infos[i].apicid / infos[i].cache[l].nbthreads_sharing; + + cache_cpuset = hwloc_bitmap_alloc(); + for (j = i; j < nbprocs; j++) { + unsigned l2; + for (l2 = 0; l2 < infos[j].numcaches; l2++) { + if (infos[j].cache[l2].level == level) + break; + } + if (l2 == infos[j].numcaches) { + /* no cache Llevel in j, odd */ + hwloc_bitmap_clr(caches_cpuset, j); + continue; + } + if (infos[j].socketid == socketid && infos[j].apicid / infos[j].cache[l2].nbthreads_sharing == cacheid) { + hwloc_bitmap_set(cache_cpuset, j); + hwloc_bitmap_clr(caches_cpuset, j); + } + } + cache = hwloc_alloc_setup_object(HWLOC_OBJ_CACHE, cacheid); + cache->attr->cache.depth = level; + cache->attr->cache.size = infos[i].cache[l].size; + cache->attr->cache.linesize = infos[i].cache[l].linesize; + cache->cpuset = cache_cpuset; + hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n", + level, cacheid, cache_cpuset); + hwloc_insert_object_by_cpuset(topology, cache); + } + } + hwloc_bitmap_free(caches_cpuset); + } + level--; + } + + for (i = 0; i < nbprocs; i++) { + free(infos[i].cache); + if (infos[i].otherids) + free(infos[i].otherids); + } +} + +#define INTEL_EBX ('G' | ('e'<<8) | ('n'<<16) | ('u'<<24)) +#define INTEL_EDX ('i' | ('n'<<8) | ('e'<<16) | ('I'<<24)) +#define INTEL_ECX ('n' | ('t'<<8) | ('e'<<16) | ('l'<<24)) + +#define AMD_EBX ('A' | ('u'<<8) | ('t'<<16) | ('h'<<24)) +#define AMD_EDX ('e' | ('n'<<8) | ('t'<<16) | ('i'<<24)) +#define AMD_ECX ('c' | ('A'<<8) | ('M'<<16) | ('D'<<24)) + +void hwloc_look_x86(struct hwloc_topology *topology, unsigned nbprocs) +{ + /* This function must always be here, but it's ok if it's empty. */ +#if defined(HWLOC_HAVE_CPUID) + unsigned eax, ebx, ecx = 0, edx; + hwloc_bitmap_t orig_cpuset; + unsigned i; + unsigned highest_cpuid; + unsigned highest_ext_cpuid; + struct procinfo *infos = NULL; + enum cpuid_type cpuid_type = unknown; + + if (!hwloc_have_cpuid()) + return; + + infos = malloc(sizeof(struct procinfo) * nbprocs); + if (NULL == infos) { + return; + } + + eax = 0x00; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + highest_cpuid = eax; + if (ebx == INTEL_EBX && ecx == INTEL_ECX && edx == INTEL_EDX) + cpuid_type = intel; + if (ebx == AMD_EBX && ecx == AMD_ECX && edx == AMD_EDX) + cpuid_type = amd; + + hwloc_debug("highest cpuid %x, cpuid type %u\n", highest_cpuid, cpuid_type); + if (highest_cpuid < 0x01) { + goto free; + } + + eax = 0x80000000; + hwloc_cpuid(&eax, &ebx, &ecx, &edx); + highest_ext_cpuid = eax; + + hwloc_debug("highest extended cpuid %x\n", highest_ext_cpuid); + + orig_cpuset = hwloc_bitmap_alloc(); + + if (topology->get_thisthread_cpubind && topology->set_thisthread_cpubind) { + if (!topology->get_thisthread_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) { + hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); + for (i = 0; i < nbprocs; i++) { + hwloc_bitmap_only(cpuset, i); + if (topology->set_thisthread_cpubind(topology, cpuset, HWLOC_CPUBIND_STRICT)) + continue; + look_proc(&infos[i], highest_cpuid, highest_ext_cpuid, cpuid_type); + } + hwloc_bitmap_free(cpuset); + topology->set_thisthread_cpubind(topology, orig_cpuset, 0); + hwloc_bitmap_free(orig_cpuset); + summarize(topology, infos, nbprocs); + goto free; + } + } + if (topology->get_thisproc_cpubind && topology->set_thisproc_cpubind) { + if (!topology->get_thisproc_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) { + hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); + for (i = 0; i < nbprocs; i++) { + hwloc_bitmap_only(cpuset, i); + if (topology->set_thisproc_cpubind(topology, cpuset, HWLOC_CPUBIND_STRICT)) + continue; + look_proc(&infos[i], highest_cpuid, highest_ext_cpuid, cpuid_type); + } + hwloc_bitmap_free(cpuset); + topology->set_thisproc_cpubind(topology, orig_cpuset, 0); + hwloc_bitmap_free(orig_cpuset); + summarize(topology, infos, nbprocs); + goto free; + } + } +#endif + + hwloc_add_object_info(topology->levels[0][0], "Backend", "x86"); + + free: + if (NULL != infos) { + free(infos); + } +} |