diff options
Diffstat (limited to 'hwloc-1.2.1/include/hwloc/helper.h')
-rw-r--r-- | hwloc-1.2.1/include/hwloc/helper.h | 1083 |
1 files changed, 1083 insertions, 0 deletions
diff --git a/hwloc-1.2.1/include/hwloc/helper.h b/hwloc-1.2.1/include/hwloc/helper.h new file mode 100644 index 00000000..45a53849 --- /dev/null +++ b/hwloc-1.2.1/include/hwloc/helper.h @@ -0,0 +1,1083 @@ +/* + * Copyright © 2009 CNRS + * Copyright © 2009-2011 INRIA. All rights reserved. + * Copyright © 2009-2011 Université Bordeaux 1 + * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. + * See COPYING in top-level directory. + */ + +/** \file + * \brief High-level hwloc traversal helpers. + */ + +#ifndef HWLOC_HELPER_H +#define HWLOC_HELPER_H + +#ifndef HWLOC_H +#error Please include the main hwloc.h instead +#endif + +#include <stdlib.h> +#include <errno.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** \defgroup hwlocality_helper_types Object Type Helpers + * @{ + */ + +/** \brief Returns the depth of objects of type \p type or below + * + * If no object of this type is present on the underlying architecture, the + * function returns the depth of the first "present" object typically found + * inside \p type. + */ +static __hwloc_inline int __hwloc_attribute_pure +hwloc_get_type_or_below_depth (hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) + return depth; + + /* find the highest existing level with type order >= */ + for(depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU); ; depth--) + if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) < 0) + return depth+1; + + /* Shouldn't ever happen, as there is always a SYSTEM level with lower order and known depth. */ + /* abort(); */ +} + +/** \brief Returns the depth of objects of type \p type or above + * + * If no object of this type is present on the underlying architecture, the + * function returns the depth of the first "present" object typically + * containing \p type. + */ +static __hwloc_inline int __hwloc_attribute_pure +hwloc_get_type_or_above_depth (hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + + if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) + return depth; + + /* find the lowest existing level with type order <= */ + for(depth = 0; ; depth++) + if (hwloc_compare_types(hwloc_get_depth_type(topology, depth), type) > 0) + return depth-1; + + /* Shouldn't ever happen, as there is always a PU level with higher order and known depth. */ + /* abort(); */ +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_traversal_basic Basic Traversal Helpers + * @{ + */ + +/** \brief Returns the top-object of the topology-tree. + * + * Its type is typically ::HWLOC_OBJ_MACHINE but it could be different + * for complex topologies. This function replaces the old deprecated + * hwloc_get_system_obj(). + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_root_obj (hwloc_topology_t topology) +{ + return hwloc_get_obj_by_depth (topology, 0, 0); +} + +/** \brief Returns the ancestor object of \p obj at depth \p depth. */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_ancestor_obj_by_depth (hwloc_topology_t topology __hwloc_attribute_unused, unsigned depth, hwloc_obj_t obj) +{ + hwloc_obj_t ancestor = obj; + if (obj->depth < depth) + return NULL; + while (ancestor && ancestor->depth > depth) + ancestor = ancestor->parent; + return ancestor; +} + +/** \brief Returns the ancestor object of \p obj with type \p type. */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_ancestor_obj_by_type (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_type_t type, hwloc_obj_t obj) +{ + hwloc_obj_t ancestor = obj->parent; + while (ancestor && ancestor->type != type) + ancestor = ancestor->parent; + return ancestor; +} + +/** \brief Returns the next object at depth \p depth. + * + * If \p prev is \c NULL, return the first object at depth \p depth. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_depth (hwloc_topology_t topology, unsigned depth, hwloc_obj_t prev) +{ + if (!prev) + return hwloc_get_obj_by_depth (topology, depth, 0); + if (prev->depth != depth) + return NULL; + return prev->next_cousin; +} + +/** \brief Returns the next object of type \p type. + * + * If \p prev is \c NULL, return the first object at type \p type. If + * there are multiple or no depth for given type, return \c NULL and + * let the caller fallback to hwloc_get_next_obj_by_depth(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_by_type (hwloc_topology_t topology, hwloc_obj_type_t type, + hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_by_depth (topology, depth, prev); +} + +/** \brief Returns the object of type ::HWLOC_OBJ_PU with \p os_index. + * + * \note The \p os_index field of object should most of the times only be + * used for pretty-printing purpose. Type ::HWLOC_OBJ_PU is the only case + * where \p os_index could actually be useful, when manually binding to + * processors. + * However, using CPU sets to hide this complexity should often be preferred. + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_pu_obj_by_os_index(hwloc_topology_t topology, unsigned os_index) +{ + hwloc_obj_t obj = NULL; + while ((obj = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PU, obj)) != NULL) + if (obj->os_index == os_index) + return obj; + return NULL; +} + +/** \brief Return the next child. + * + * If \p prev is \c NULL, return the first child. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_child (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t parent, hwloc_obj_t prev) +{ + if (!prev) + return parent->first_child; + if (prev->parent != parent) + return NULL; + return prev->next_sibling; +} + +/** \brief Returns the common parent object to objects lvl1 and lvl2 */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_common_ancestor_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj1, hwloc_obj_t obj2) +{ + /* the loop isn't so easy since intermediate ancestors may have + * different depth, causing us to alternate between using obj1->parent + * and obj2->parent. Also, even if at some point we find ancestors of + * of the same depth, their ancestors may have different depth again. + */ + while (obj1 != obj2) { + while (obj1->depth > obj2->depth) + obj1 = obj1->parent; + while (obj2->depth > obj1->depth) + obj2 = obj2->parent; + if (obj1 != obj2 && obj1->depth == obj2->depth) { + obj1 = obj1->parent; + obj2 = obj2->parent; + } + } + return obj1; +} + +/** \brief Returns true if \p obj is inside the subtree beginning with \p subtree_root. + * + * \note This function assumes that both \p obj and \p subtree_root have a \p cpuset. + */ +static __hwloc_inline int __hwloc_attribute_pure +hwloc_obj_is_in_subtree (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, hwloc_obj_t subtree_root) +{ + return hwloc_bitmap_isincluded(obj->cpuset, subtree_root->cpuset); +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_inside Finding Objects Inside a CPU set + * @{ + */ + +/** \brief Get the first largest object included in the given cpuset \p set. + * + * \return the first object that is included in \p set and whose parent is not. + * + * This is convenient for iterating over all largest objects within a CPU set + * by doing a loop getting the first largest object and clearing its CPU set + * from the remaining CPU set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_first_largest_obj_inside_cpuset(hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + hwloc_obj_t obj = hwloc_get_root_obj(topology); + /* FIXME: what if !root->cpuset? */ + if (!hwloc_bitmap_intersects(obj->cpuset, set)) + return NULL; + while (!hwloc_bitmap_isincluded(obj->cpuset, set)) { + /* while the object intersects without being included, look at its children */ + hwloc_obj_t child = NULL; + while ((child = hwloc_get_next_child(topology, obj, child)) != NULL) { + if (child->cpuset && hwloc_bitmap_intersects(child->cpuset, set)) + break; + } + if (!child) + /* no child intersects, return their father */ + return obj; + /* found one intersecting child, look at its children */ + obj = child; + } + /* obj is included, return it */ + return obj; +} + +/** \brief Get the set of largest objects covering exactly a given cpuset \p set + * + * \return the number of objects returned in \p objs. + */ +HWLOC_DECLSPEC int hwloc_get_largest_objs_inside_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_t * __hwloc_restrict objs, int max); + +/** \brief Return the next object at depth \p depth included in CPU set \p set. + * + * If \p prev is \c NULL, return the first object at depth \p depth + * included in \p set. The next invokation should pass the previous + * return value in \p prev so as to obtain the next object in \p set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth, hwloc_obj_t prev) +{ + hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); + /* no need to check next->cpuset because objects in levels always have a cpuset */ + while (next && !hwloc_bitmap_isincluded(next->cpuset, set)) + next = next->next_cousin; + return next; +} + +/** \brief Return the next object of type \p type included in CPU set \p set. + * + * If there are multiple or no depth for given type, return \c NULL + * and let the caller fallback to + * hwloc_get_next_obj_inside_cpuset_by_depth(). + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_inside_cpuset_by_depth(topology, set, depth, prev); +} + +/** \brief Return the (logically) \p idx -th object at depth \p depth included in CPU set \p set. + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_obj_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth, unsigned idx) +{ + unsigned count = 0; + hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); + while (obj) { + /* no need to check obj->cpuset because objects in levels always have a cpuset */ + if (hwloc_bitmap_isincluded(obj->cpuset, set)) { + if (count == idx) + return obj; + count++; + } + obj = obj->next_cousin; + } + return NULL; +} + +/** \brief Return the \p idx -th object of type \p type included in CPU set \p set. + * + * If there are multiple or no depth for given type, return \c NULL + * and let the caller fallback to + * hwloc_get_obj_inside_cpuset_by_depth(). + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_obj_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, unsigned idx) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_obj_inside_cpuset_by_depth(topology, set, depth, idx); +} + +/** \brief Return the number of objects at depth \p depth included in CPU set \p set. */ +static __hwloc_inline unsigned __hwloc_attribute_pure +hwloc_get_nbobjs_inside_cpuset_by_depth (hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth) +{ + hwloc_obj_t obj = hwloc_get_obj_by_depth (topology, depth, 0); + int count = 0; + while (obj) { + /* no need to check obj->cpuset because objects in levels always have a cpuset */ + if (hwloc_bitmap_isincluded(obj->cpuset, set)) + count++; + obj = obj->next_cousin; + } + return count; +} + +/** \brief Return the number of objects of type \p type included in CPU set \p set. + * + * If no object for that type exists inside CPU set \p set, 0 is + * returned. If there are several levels with objects of that type + * inside CPU set \p set, -1 is returned. + */ +static __hwloc_inline int __hwloc_attribute_pure +hwloc_get_nbobjs_inside_cpuset_by_type (hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) + return 0; + if (depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return -1; /* FIXME: agregate nbobjs from different levels? */ + return hwloc_get_nbobjs_inside_cpuset_by_depth(topology, set, depth); +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_covering Finding a single Object covering at least CPU set + * @{ + */ + +/** \brief Get the child covering at least CPU set \p set. + * + * \return \c NULL if no child matches or if \p set is empty. + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_child_covering_cpuset (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t set, + hwloc_obj_t parent) +{ + hwloc_obj_t child; + + if (hwloc_bitmap_iszero(set)) + return NULL; + + child = parent->first_child; + while (child) { + if (child->cpuset && hwloc_bitmap_isincluded(set, child->cpuset)) + return child; + child = child->next_sibling; + } + return NULL; +} + +/** \brief Get the lowest object covering at least CPU set \p set + * + * \return \c NULL if no object matches or if \p set is empty. + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_obj_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + struct hwloc_obj *current = hwloc_get_root_obj(topology); + + if (hwloc_bitmap_iszero(set)) + return NULL; + + /* FIXME: what if !root->cpuset? */ + if (!hwloc_bitmap_isincluded(set, current->cpuset)) + return NULL; + + while (1) { + hwloc_obj_t child = hwloc_get_child_covering_cpuset(topology, set, current); + if (!child) + return current; + current = child; + } +} + + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_coverings Finding a set of similar Objects covering at least a CPU set + * @{ + */ + +/** \brief Iterate through same-depth objects covering at least CPU set \p set + * + * If object \p prev is \c NULL, return the first object at depth \p + * depth covering at least part of CPU set \p set. The next + * invokation should pass the previous return value in \p prev so as + * to obtain the next object covering at least another part of \p set. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_covering_cpuset_by_depth(hwloc_topology_t topology, hwloc_const_cpuset_t set, + unsigned depth, hwloc_obj_t prev) +{ + hwloc_obj_t next = hwloc_get_next_obj_by_depth(topology, depth, prev); + /* no need to check next->cpuset because objects in levels always have a cpuset */ + while (next && !hwloc_bitmap_intersects(set, next->cpuset)) + next = next->next_cousin; + return next; +} + +/** \brief Iterate through same-type objects covering at least CPU set \p set + * + * If object \p prev is \c NULL, return the first object of type \p + * type covering at least part of CPU set \p set. The next invokation + * should pass the previous return value in \p prev so as to obtain + * the next object of type \p type covering at least another part of + * \p set. + * + * If there are no or multiple depths for type \p type, \c NULL is returned. + * The caller may fallback to hwloc_get_next_obj_covering_cpuset_by_depth() + * for each depth. + */ +static __hwloc_inline hwloc_obj_t +hwloc_get_next_obj_covering_cpuset_by_type(hwloc_topology_t topology, hwloc_const_cpuset_t set, + hwloc_obj_type_t type, hwloc_obj_t prev) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN || depth == HWLOC_TYPE_DEPTH_MULTIPLE) + return NULL; + return hwloc_get_next_obj_covering_cpuset_by_depth(topology, set, depth, prev); +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_find_cache Cache-specific Finding Helpers + * @{ + */ + +/** \brief Get the first cache covering a cpuset \p set + * + * \return \c NULL if no cache matches + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_cache_covering_cpuset (hwloc_topology_t topology, hwloc_const_cpuset_t set) +{ + hwloc_obj_t current = hwloc_get_obj_covering_cpuset(topology, set); + while (current) { + if (current->type == HWLOC_OBJ_CACHE) + return current; + current = current->parent; + } + return NULL; +} + +/** \brief Get the first cache shared between an object and somebody else. + * + * \return \c NULL if no cache matches or if an invalid object is given. + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj) +{ + hwloc_obj_t current = obj->parent; + if (!obj->cpuset) + return NULL; + while (current && current->cpuset) { + if (!hwloc_bitmap_isequal(current->cpuset, obj->cpuset) + && current->type == HWLOC_OBJ_CACHE) + return current; + current = current->parent; + } + return NULL; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_traversal Advanced Traversal Helpers + * @{ + */ + +/** \brief Do a depth-first traversal of the topology to find and sort + * + * all objects that are at the same depth than \p src. + * Report in \p objs up to \p max physically closest ones to \p src. + * + * \return the number of objects returned in \p objs. + */ +/* TODO: rather provide an iterator? Provide a way to know how much should be allocated? By returning the total number of objects instead? */ +HWLOC_DECLSPEC unsigned hwloc_get_closest_objs (hwloc_topology_t topology, hwloc_obj_t src, hwloc_obj_t * __hwloc_restrict objs, unsigned max); + +/** \brief Find an object below another object, both specified by types and indexes. + * + * Start from the top system object and find object of type \p type1 + * and logical index \p idx1. Then look below this object and find another + * object of type \p type2 and logical index \p idx2. Indexes are specified + * within the parent, not withing the entire system. + * + * For instance, if type1 is SOCKET, idx1 is 2, type2 is CORE and idx2 + * is 3, return the fourth core object below the third socket. + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_obj_below_by_type (hwloc_topology_t topology, + hwloc_obj_type_t type1, unsigned idx1, + hwloc_obj_type_t type2, unsigned idx2) +{ + hwloc_obj_t obj; + + obj = hwloc_get_obj_by_type (topology, type1, idx1); + if (!obj) + return NULL; + + return hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, type2, idx2); +} + +/** \brief Find an object below a chain of objects specified by types and indexes. + * + * This is a generalized version of hwloc_get_obj_below_by_type(). + * + * Arrays \p typev and \p idxv must contain \p nr types and indexes. + * + * Start from the top system object and walk the arrays \p typev and \p idxv. + * For each type and logical index couple in the arrays, look under the previously found + * object to find the index-th object of the given type. + * Indexes are specified within the parent, not withing the entire system. + * + * For instance, if nr is 3, typev contains NODE, SOCKET and CORE, + * and idxv contains 0, 1 and 2, return the third core object below + * the second socket below the first NUMA node. + */ +static __hwloc_inline hwloc_obj_t __hwloc_attribute_pure +hwloc_get_obj_below_array_by_type (hwloc_topology_t topology, int nr, hwloc_obj_type_t *typev, unsigned *idxv) +{ + hwloc_obj_t obj = hwloc_get_root_obj(topology); + int i; + + /* FIXME: what if !root->cpuset? */ + for(i=0; i<nr; i++) { + obj = hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset, typev[i], idxv[i]); + if (!obj) + return NULL; + } + + return obj; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_binding Binding Helpers + * @{ + */ + +/** \brief Distribute \p n items over the topology under \p root + * + * Array \p cpuset will be filled with \p n cpusets recursively distributed + * linearly over the topology under \p root, down to depth \p until (which can + * be INT_MAX to distribute down to the finest level). + * + * This is typically useful when an application wants to distribute \p n + * threads over a machine, giving each of them as much private cache as + * possible and keeping them locally in number order. + * + * The caller may typically want to also call hwloc_bitmap_singlify() + * before binding a thread so that it does not move at all. + */ +static __hwloc_inline void +hwloc_distributev(hwloc_topology_t topology, hwloc_obj_t *root, unsigned n_roots, hwloc_cpuset_t *cpuset, unsigned n, unsigned until); +static __hwloc_inline void +hwloc_distribute(hwloc_topology_t topology, hwloc_obj_t root, hwloc_cpuset_t *cpuset, unsigned n, unsigned until) +{ + unsigned i; + + /* FIXME: what if !root->cpuset? */ + if (!root->arity || n == 1 || root->depth >= until) { + /* Got to the bottom, we can't split any more, put everything there. */ + for (i=0; i<n; i++) + cpuset[i] = hwloc_bitmap_dup(root->cpuset); + return; + } + + hwloc_distributev(topology, root->children, root->arity, cpuset, n, until); +} + +/** \brief Distribute \p n items over the topology under \p roots + * + * This is the same as hwloc_distribute, but takes an array of roots instead of + * just one root. + */ +static __hwloc_inline void +hwloc_distributev(hwloc_topology_t topology, hwloc_obj_t *roots, unsigned n_roots, hwloc_cpuset_t *cpuset, unsigned n, unsigned until) +{ + unsigned i; + unsigned tot_weight; + hwloc_cpuset_t *cpusetp = cpuset; + + tot_weight = 0; + for (i = 0; i < n_roots; i++) + if (roots[i]->cpuset) + tot_weight += hwloc_bitmap_weight(roots[i]->cpuset); + + for (i = 0; i < n_roots && tot_weight; i++) { + /* Give to roots[i] a portion proportional to its weight */ + unsigned weight = roots[i]->cpuset ? hwloc_bitmap_weight(roots[i]->cpuset) : 0; + unsigned chunk = (n * weight + tot_weight-1) / tot_weight; + hwloc_distribute(topology, roots[i], cpusetp, chunk, until); + cpusetp += chunk; + tot_weight -= weight; + n -= chunk; + } +} + +/** \brief Allocate some memory on the given nodeset \p nodeset + * + * This is similar to hwloc_alloc_membind except that it is allowed to change + * the current memory binding policy, thus providing more binding support, at + * the expense of changing the current state. + */ +static __hwloc_inline void * +hwloc_alloc_membind_policy_nodeset(hwloc_topology_t topology, size_t len, hwloc_const_nodeset_t nodeset, hwloc_membind_policy_t policy, int flags) +{ + void *p = hwloc_alloc_membind_nodeset(topology, len, nodeset, policy, flags); + if (p) + return p; + hwloc_set_membind_nodeset(topology, nodeset, policy, flags); + p = hwloc_alloc(topology, len); + if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH) + /* Enforce the binding by touching the data */ + memset(p, 0, len); + return p; +} + +/** \brief Allocate some memory on the memory nodes near given cpuset \p cpuset + * + * This is similar to hwloc_alloc_membind_policy_nodeset, but for a given cpuset. + */ +static __hwloc_inline void * +hwloc_alloc_membind_policy(hwloc_topology_t topology, size_t len, hwloc_const_cpuset_t cpuset, hwloc_membind_policy_t policy, int flags) +{ + void *p = hwloc_alloc_membind(topology, len, cpuset, policy, flags); + if (p) + return p; + hwloc_set_membind(topology, cpuset, policy, flags); + p = hwloc_alloc(topology, len); + if (p && policy != HWLOC_MEMBIND_FIRSTTOUCH) + /* Enforce the binding by touching the data */ + memset(p, 0, len); + return p; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_cpuset Cpuset Helpers + * @{ + */ +/** \brief Get complete CPU set + * + * \return the complete CPU set of logical processors of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed; hwloc_cpuset_dup must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_cpuset_t __hwloc_attribute_pure +hwloc_topology_get_complete_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->complete_cpuset; +} + +/** \brief Get topology CPU set + * + * \return the CPU set of logical processors of the system for which hwloc + * provides topology information. This is equivalent to the cpuset of the + * system object. If the topology is the result of a combination of several + * systems, NULL is returned. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed; hwloc_cpuset_dup must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_cpuset_t __hwloc_attribute_pure +hwloc_topology_get_topology_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->cpuset; +} + +/** \brief Get online CPU set + * + * \return the CPU set of online logical processors of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed; hwloc_cpuset_dup must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_cpuset_t __hwloc_attribute_pure +hwloc_topology_get_online_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->online_cpuset; +} + +/** \brief Get allowed CPU set + * + * \return the CPU set of allowed logical processors of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned cpuset is not newly allocated and should thus not be + * changed or freed, hwloc_cpuset_dup must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_cpuset_t __hwloc_attribute_pure +hwloc_topology_get_allowed_cpuset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->allowed_cpuset; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_nodeset Nodeset Helpers + * @{ + */ +/** \brief Get complete node set + * + * \return the complete node set of memory of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned nodeset is not newly allocated and should thus not be + * changed or freed; hwloc_nodeset_dup must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_nodeset_t __hwloc_attribute_pure +hwloc_topology_get_complete_nodeset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->complete_nodeset; +} + +/** \brief Get topology node set + * + * \return the node set of memory of the system for which hwloc + * provides topology information. This is equivalent to the nodeset of the + * system object. If the topology is the result of a combination of several + * systems, NULL is returned. + * + * \note The returned nodeset is not newly allocated and should thus not be + * changed or freed; hwloc_nodeset_dup must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_nodeset_t __hwloc_attribute_pure +hwloc_topology_get_topology_nodeset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->nodeset; +} + +/** \brief Get allowed node set + * + * \return the node set of allowed memory of the system. If the + * topology is the result of a combination of several systems, NULL is + * returned. + * + * \note The returned nodeset is not newly allocated and should thus not be + * changed or freed, hwloc_nodeset_dup must be used to obtain a local copy. + */ +static __hwloc_inline hwloc_const_nodeset_t __hwloc_attribute_pure +hwloc_topology_get_allowed_nodeset(hwloc_topology_t topology) +{ + return hwloc_get_root_obj(topology)->allowed_nodeset; +} + +/** @} */ + + + +/** \defgroup hwlocality_helper_nodeset_convert Conversion between cpuset and nodeset + * + * There are two semantics for converting cpusets to nodesets depending on how + * non-NUMA machines are handled. + * + * When manipulating nodesets for memory binding, non-NUMA machines should be + * considered as having a single NUMA node. The standard conversion routines + * below should be used so that marking the first bit of the nodeset means + * that memory should be bound to a non-NUMA whole machine. + * + * When manipulating nodesets as an actual list of NUMA nodes without any + * need to handle memory binding on non-NUMA machines, the strict conversion + * routines may be used instead. + * @{ + */ + +/** \brief Convert a CPU set into a NUMA node set and handle non-NUMA cases + * + * If some NUMA nodes have no CPUs at all, this function never sets their + * indexes in the output node set, even if a full CPU set is given in input. + * + * If the topology contains no NUMA nodes, the machine is considered + * as a single memory node, and the following behavior is used: + * If \p cpuset is empty, \p nodeset will be emptied as well. + * Otherwise \p nodeset will be entirely filled. + */ +static __hwloc_inline void +hwloc_cpuset_to_nodeset(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, hwloc_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); + hwloc_obj_t obj; + + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) { + if (hwloc_bitmap_iszero(cpuset)) + hwloc_bitmap_zero(nodeset); + else + /* Assume the whole system */ + hwloc_bitmap_fill(nodeset); + return; + } + + hwloc_bitmap_zero(nodeset); + obj = NULL; + while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, obj)) != NULL) + hwloc_bitmap_set(nodeset, obj->os_index); +} + +/** \brief Convert a CPU set into a NUMA node set without handling non-NUMA cases + * + * This is the strict variant of ::hwloc_cpuset_to_nodeset. It does not fix + * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly + * the same. However, if the topology contains no NUMA nodes, return an empty + * nodeset. + */ +static __hwloc_inline void +hwloc_cpuset_to_nodeset_strict(struct hwloc_topology *topology, hwloc_const_cpuset_t cpuset, hwloc_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); + hwloc_obj_t obj; + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN ) + return; + hwloc_bitmap_zero(nodeset); + obj = NULL; + while ((obj = hwloc_get_next_obj_covering_cpuset_by_depth(topology, cpuset, depth, obj)) != NULL) + hwloc_bitmap_set(nodeset, obj->os_index); +} + +/** \brief Convert a NUMA node set into a CPU set and handle non-NUMA cases + * + * If the topology contains no NUMA nodes, the machine is considered + * as a single memory node, and the following behavior is used: + * If \p nodeset is empty, \p cpuset will be emptied as well. + * Otherwise \p cpuset will be entirely filled. + * This is useful for manipulating memory binding sets. + */ +static __hwloc_inline void +hwloc_cpuset_from_nodeset(hwloc_topology_t topology, hwloc_cpuset_t cpuset, hwloc_const_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); + hwloc_obj_t obj; + + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN ) { + if (hwloc_bitmap_iszero(nodeset)) + hwloc_bitmap_zero(cpuset); + else + /* Assume the whole system */ + hwloc_bitmap_fill(cpuset); + return; + } + + hwloc_bitmap_zero(cpuset); + obj = NULL; + while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL) { + if (hwloc_bitmap_isset(nodeset, obj->os_index)) + /* no need to check obj->cpuset because objects in levels always have a cpuset */ + hwloc_bitmap_or(cpuset, cpuset, obj->cpuset); + } +} + +/** \brief Convert a NUMA node set into a CPU set without handling non-NUMA cases + * + * This is the strict variant of ::hwloc_cpuset_from_nodeset. It does not fix + * non-NUMA cases. If the topology contains some NUMA nodes, behave exactly + * the same. However, if the topology contains no NUMA nodes, return an empty + * cpuset. + */ +static __hwloc_inline void +hwloc_cpuset_from_nodeset_strict(struct hwloc_topology *topology, hwloc_cpuset_t cpuset, hwloc_const_nodeset_t nodeset) +{ + int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); + hwloc_obj_t obj; + if (depth == HWLOC_TYPE_DEPTH_UNKNOWN ) + return; + hwloc_bitmap_zero(cpuset); + obj = NULL; + while ((obj = hwloc_get_next_obj_by_depth(topology, depth, obj)) != NULL) + if (hwloc_bitmap_isset(nodeset, obj->os_index)) + /* no need to check obj->cpuset because objects in levels always have a cpuset */ + hwloc_bitmap_or(cpuset, cpuset, obj->cpuset); +} + +/** @} */ + + + +/** \defgroup hwlocality_distances Distances + * @{ + */ + +/** \brief Get the distances between all objects at the given depth. + * + * \return a distances structure containing a matrix with all distances + * between all objects at the given depth. + * + * Slot i+nbobjs*j contains the distance from the object of logical index i + * the object of logical index j. + * + * \note This function only returns matrices covering the whole topology, + * without any unknown distance value. Those matrices are available in + * top-level object of the hierarchy. Matrices of lower objects are not + * reported here since they cover only part of the machine. + * + * The returned structure belongs to the hwloc library. The caller should + * not modify or free it. + * + * \return \c NULL if no such distance matrix exists. + */ + +static __hwloc_inline const struct hwloc_distances_s * +hwloc_get_whole_distance_matrix_by_depth(hwloc_topology_t topology, unsigned depth) +{ + hwloc_obj_t root = hwloc_get_root_obj(topology); + unsigned i; + for(i=0; i<root->distances_count; i++) + if (root->distances[i]->relative_depth == depth) + return root->distances[i]; + return NULL; +} + +/** \brief Get the distances between all objects of a given type. + * + * \return a distances structure containing a matrix with all distances + * between all objects of the given type. + * + * Slot i+nbobjs*j contains the distance from the object of logical index i + * the object of logical index j. + * + * \note This function only returns matrices covering the whole topology, + * without any unknown distance value. Those matrices are available in + * top-level object of the hierarchy. Matrices of lower objects are not + * reported here since they cover only part of the machine. + * + * The returned structure belongs to the hwloc library. The caller should + * not modify or free it. + * + * \return \c NULL if no such distance matrix exists. + */ + +static __hwloc_inline const struct hwloc_distances_s * +hwloc_get_whole_distance_matrix_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) +{ + int depth = hwloc_get_type_depth(topology, type); + if (depth < 0) + return NULL; + return hwloc_get_whole_distance_matrix_by_depth(topology, depth); +} + +/** \brief Get distances for the given depth and covering some objects + * + * Return a distance matrix that describes depth \p depth and covers at + * least object \p obj and all its ancestors. + * + * When looking for the distance between some objects, a common ancestor should + * be passed in \p obj. + * + * \p firstp is set to logical index of the first object described by the matrix. + * + * The returned structure belongs to the hwloc library. The caller should + * not modify or free it. + */ +static __hwloc_inline const struct hwloc_distances_s * +hwloc_get_distance_matrix_covering_obj_by_depth(hwloc_topology_t topology, + hwloc_obj_t obj, unsigned depth, + unsigned *firstp) +{ + while (obj && obj->cpuset) { + unsigned i; + for(i=0; i<obj->distances_count; i++) + if (obj->distances[i]->relative_depth == depth - obj->depth) { + if (!obj->distances[i]->nbobjs) + continue; + *firstp = hwloc_get_next_obj_inside_cpuset_by_depth(topology, obj->cpuset, depth, NULL)->logical_index; + return obj->distances[i]; + } + obj = obj->parent; + } + return NULL; +} + |