diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2023-02-02 00:14:35 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-02 00:14:35 +0200 |
commit | 55d1f00bb7c2403b451947b2a225b5d1f6be9183 (patch) | |
tree | 043e57edb64b319b1eb6a883d6980fa2d9dd2c8e /libnetdata/aral | |
parent | 2e56e2b87622a102aef876d297a3cd80d35028e5 (diff) |
DBENGINE v2 - improvements part 12 (#14379)
* parallel initialization of tiers
* do not spawn multiple dbengine event loops
* user configurable dbengine parallel initialization
* size netdata based on the real cpu cores available on the system netdata runs, not on the system monitored
* user configurable system cpus
* move cpuset parsing to os.c/.h
* fix replication of misaligned chart dimensions
* give a different path to each tier thread
* statically allocate the path into the initialization structure
* use aral for reusing dbengine pages
* dictionaries uses ARAL for fixed sized values
* fix compilation without internal checks
* journal v2 index uses aral
* test to see judy allocations
* judy allocations using aral
* Add config option to select if dbengine will use direct I/O (default is yes)
* V1 journafiles will use uv_fs_read instead of mmap (respect the direct I/O setting)
* Remove sqlite3IsMemdb as it is unused
* Fix compilation error when --disable-dbengine is used
* use aral for dbengine work_cmds
* changed aral API to support new features
* pgc and mrg aral overheads
* rrdeng opcodes using aral
* better structuring and naming
* dbegnine query handles using aral
* page descriptors using aral
* remove obsolete linking
* extent io descriptors using aral
* aral keeps one last page alive
* add missing return value
* added judy aral overhead
* pdc now uses aral
* page_details now use aral
* epdl and deol using aral - make sure ARALs are initialized before spawning the event loop
* remove unused linking
* pgc now uses one aral per partition
* aral measure maximum allocation queue
* aral to allocate pages in parallel
* aral parallel pages allocation when needed
* aral cleanup
* track page allocation and page population separately
---------
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'libnetdata/aral')
-rw-r--r-- | libnetdata/aral/aral.c | 407 | ||||
-rw-r--r-- | libnetdata/aral/aral.h | 36 |
2 files changed, 319 insertions, 124 deletions
diff --git a/libnetdata/aral/aral.c b/libnetdata/aral/aral.c index 8ea4f64624..4505ee0f28 100644 --- a/libnetdata/aral/aral.c +++ b/libnetdata/aral/aral.c @@ -35,6 +35,9 @@ typedef struct aral_page { struct { uint32_t used_elements; // the number of used elements on this page uint32_t free_elements; // the number of free elements on this page + + struct aral_page *prev; // the prev page on the list + struct aral_page *next; // the next page on the list } aral_lock; struct { @@ -42,25 +45,29 @@ typedef struct aral_page { ARAL_FREE *list; } free; - struct aral_page *prev; // the prev page on the list - struct aral_page *next; // the next page on the list } ARAL_PAGE; +typedef enum { + ARAL_LOCKLESS = (1 << 0), + ARAL_DEFRAGMENT = (1 << 1), + ARAL_ALLOCATED_STATS = (1 << 2), +} ARAL_OPTIONS; + struct aral { struct { char name[ARAL_MAX_NAME + 1]; - bool lockless; - bool defragment; + ARAL_OPTIONS options; size_t element_size; // calculated to take into account ARAL overheads - size_t max_allocation_size; // calculated in bytes + size_t max_allocation_size; // calculated in bytes + size_t max_page_elements; // calculated size_t page_ptr_offset; // calculated size_t natural_page_size; // calculated - size_t requested_element_size; size_t initial_page_elements; - size_t max_page_elements; + size_t requested_element_size; + size_t requested_max_page_size; struct { bool enabled; @@ -82,40 +89,36 @@ struct aral { struct { SPINLOCK spinlock; - size_t allocation_size; // current allocation size + size_t allocating_elements; // currently allocating elements + size_t allocation_size; // current / next allocation size } adders; struct { + size_t allocators; // the number of threads currently trying to allocate memory } atomic; + + struct aral_statistics *stats; }; -struct { - struct { - struct { - size_t allocations; - size_t allocated; - } structures; +size_t aral_structures_from_stats(struct aral_statistics *stats) { + return __atomic_load_n(&stats->structures.allocated_bytes, __ATOMIC_RELAXED); +} - struct { - size_t allocations; - size_t allocated; - size_t used; - } malloc; +size_t aral_overhead_from_stats(struct aral_statistics *stats) { + return __atomic_load_n(&stats->malloc.allocated_bytes, __ATOMIC_RELAXED) - + __atomic_load_n(&stats->malloc.used_bytes, __ATOMIC_RELAXED); +} - struct { - size_t allocations; - size_t allocated; - size_t used; - } mmap; - } atomic; -} aral_globals = {}; +size_t aral_overhead(ARAL *ar) { + return aral_overhead_from_stats(ar->stats); +} + +size_t aral_structures(ARAL *ar) { + return aral_structures_from_stats(ar->stats); +} -void aral_get_size_statistics(size_t *structures, size_t *malloc_allocated, size_t *malloc_used, size_t *mmap_allocated, size_t *mmap_used) { - *structures = __atomic_load_n(&aral_globals.atomic.structures.allocated, __ATOMIC_RELAXED); - *malloc_allocated = __atomic_load_n(&aral_globals.atomic.malloc.allocated, __ATOMIC_RELAXED); - *malloc_used = __atomic_load_n(&aral_globals.atomic.malloc.used, __ATOMIC_RELAXED); - *mmap_allocated = __atomic_load_n(&aral_globals.atomic.mmap.allocated, __ATOMIC_RELAXED); - *mmap_used = __atomic_load_n(&aral_globals.atomic.mmap.used, __ATOMIC_RELAXED); +struct aral_statistics *aral_statistics(ARAL *ar) { + return ar->stats; } #define ARAL_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2) @@ -137,15 +140,42 @@ static size_t aral_align_alloc_size(ARAL *ar, uint64_t size) { } static inline void aral_lock(ARAL *ar) { - if(likely(!ar->config.lockless)) + if(likely(!(ar->config.options & ARAL_LOCKLESS))) netdata_spinlock_lock(&ar->aral_lock.spinlock); } static inline void aral_unlock(ARAL *ar) { - if(likely(!ar->config.lockless)) + if(likely(!(ar->config.options & ARAL_LOCKLESS))) netdata_spinlock_unlock(&ar->aral_lock.spinlock); } +static inline void aral_page_free_lock(ARAL *ar, ARAL_PAGE *page) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_lock(&page->free.spinlock); +} + +static inline void aral_page_free_unlock(ARAL *ar, ARAL_PAGE *page) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_unlock(&page->free.spinlock); +} + +static inline bool aral_adders_trylock(ARAL *ar) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + return netdata_spinlock_trylock(&ar->adders.spinlock); + + return true; +} + +static inline void aral_adders_lock(ARAL *ar) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_lock(&ar->adders.spinlock); +} + +static inline void aral_adders_unlock(ARAL *ar) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_unlock(&ar->adders.spinlock); +} + static void aral_delete_leftover_files(const char *name, const char *path, const char *required_prefix) { DIR *dir = opendir(path); if(!dir) return; @@ -197,7 +227,7 @@ static inline ARAL_PAGE *find_page_with_allocation_internal_check(ARAL *ar, void uintptr_t seeking = (uintptr_t)ptr; ARAL_PAGE *page; - for(page = ar->aral_lock.pages; page ; page = page->next) { + for(page = ar->aral_lock.pages; page ; page = page->aral_lock.next) { if(unlikely(seeking >= (uintptr_t)page->data && seeking < (uintptr_t)page->data + page->size)) break; } @@ -230,23 +260,29 @@ static inline ARAL_PAGE *find_page_with_free_slots_internal_check___with_aral_lo } #endif -static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { - ARAL_PAGE *page = callocz(1, sizeof(ARAL_PAGE)); - netdata_spinlock_init(&page->free.spinlock); - page->size = ar->adders.allocation_size; +size_t aral_next_allocation_size___adders_lock_needed(ARAL *ar) { + size_t size = ar->adders.allocation_size; - if(page->size > ar->config.max_allocation_size) - page->size = ar->config.max_allocation_size; + if(size > ar->config.max_allocation_size) + size = ar->config.max_allocation_size; else - ar->adders.allocation_size = aral_align_alloc_size(ar, (uint64_t)ar->adders.allocation_size * 4 / 3); + ar->adders.allocation_size = aral_align_alloc_size(ar, (uint64_t)ar->adders.allocation_size * 2); - page->max_elements = page->aral_lock.free_elements = page->size / ar->config.element_size; + return size; +} + +static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { + ARAL_PAGE *page = callocz(1, sizeof(ARAL_PAGE)); + netdata_spinlock_init(&page->free.spinlock); + page->size = size; + page->max_elements = page->size / ar->config.element_size; + page->aral_lock.free_elements = page->max_elements; page->free_elements_to_move_first = page->max_elements / 4; if(unlikely(page->free_elements_to_move_first < 1)) page->free_elements_to_move_first = 1; - __atomic_add_fetch(&aral_globals.atomic.structures.allocations, 1, __ATOMIC_RELAXED); - __atomic_add_fetch(&aral_globals.atomic.structures.allocated, sizeof(ARAL_PAGE), __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->structures.allocated_bytes, sizeof(ARAL_PAGE), __ATOMIC_RELAXED); if(unlikely(ar->config.mmap.enabled)) { ar->aral_lock.file_number++; @@ -257,8 +293,8 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar TRACE_ALLOCATIONS_F if (unlikely(!page->data)) fatal("ARAL: '%s' cannot allocate aral buffer of size %zu on filename '%s'", ar->config.name, page->size, page->filename); - __atomic_add_fetch(&aral_globals.atomic.mmap.allocations, 1, __ATOMIC_RELAXED); - __atomic_add_fetch(&aral_globals.atomic.mmap.allocated, page->size, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->mmap.allocated_bytes, page->size, __ATOMIC_RELAXED); } else { #ifdef NETDATA_TRACE_ALLOCATIONS @@ -266,8 +302,8 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar TRACE_ALLOCATIONS_F #else page->data = mallocz(page->size); #endif - __atomic_add_fetch(&aral_globals.atomic.malloc.allocations, 1, __ATOMIC_RELAXED); - __atomic_add_fetch(&aral_globals.atomic.malloc.allocated, page->size, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->malloc.allocations, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->malloc.allocated_bytes, page->size, __ATOMIC_RELAXED); } // link the free space to its page @@ -292,8 +328,8 @@ void aral_del_page___no_lock_needed(ARAL *ar, ARAL_PAGE *page TRACE_ALLOCATIONS_ freez((void *)page->filename); - __atomic_sub_fetch(&aral_globals.atomic.mmap.allocations, 1, __ATOMIC_RELAXED); - __atomic_sub_fetch(&aral_globals.atomic.mmap.allocated, page->size, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->mmap.allocated_bytes, page->size, __ATOMIC_RELAXED); } else { #ifdef NETDATA_TRACE_ALLOCATIONS @@ -301,14 +337,14 @@ void aral_del_page___no_lock_needed(ARAL *ar, ARAL_PAGE *page TRACE_ALLOCATIONS_ #else freez(page->data); #endif - __atomic_sub_fetch(&aral_globals.atomic.malloc.allocations, 1, __ATOMIC_RELAXED); - __atomic_sub_fetch(&aral_globals.atomic.malloc.allocated, page->size, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->malloc.allocations, 1, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->malloc.allocated_bytes, page->size, __ATOMIC_RELAXED); } freez(page); - __atomic_sub_fetch(&aral_globals.atomic.structures.allocations, 1, __ATOMIC_RELAXED); - __atomic_sub_fetch(&aral_globals.atomic.structures.allocated, sizeof(ARAL_PAGE), __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->structures.allocated_bytes, sizeof(ARAL_PAGE), __ATOMIC_RELAXED); } static inline void aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) { @@ -319,23 +355,24 @@ static inline void aral_insert_not_linked_page_with_free_items_to_proper_positio !first->aral_lock.free_elements || page->aral_lock.free_elements <= first->aral_lock.free_elements + ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST) { // first position - DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); } else { - ARAL_PAGE *second = first->next; + ARAL_PAGE *second = first->aral_lock.next; if (!second || !second->aral_lock.free_elements || page->aral_lock.free_elements <= second->aral_lock.free_elements) // second position - DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, first, page, prev, next); + DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, first, page, aral_lock.prev, aral_lock.next); else // third position - DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, second, page, prev, next); + DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, second, page, aral_lock.prev, aral_lock.next); } } static inline ARAL_PAGE *aral_acquire_a_free_slot(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { + __atomic_add_fetch(&ar->atomic.allocators, 1, __ATOMIC_RELAXED); aral_lock(ar); ARAL_PAGE *page = ar->aral_lock.pages; @@ -346,34 +383,51 @@ static inline ARAL_PAGE *aral_acquire_a_free_slot(ARAL *ar TRACE_ALLOCATIONS_FUN #endif aral_unlock(ar); - if(netdata_spinlock_trylock(&ar->adders.spinlock)) { - page = aral_create_page___no_lock_needed(ar TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); + if(aral_adders_trylock(ar)) { + if(ar->adders.allocating_elements < __atomic_load_n(&ar->atomic.allocators, __ATOMIC_RELAXED)) { - aral_lock(ar); - aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ar, page); - netdata_spinlock_unlock(&ar->adders.spinlock); - break; - } - else { - aral_lock(ar); - page = ar->aral_lock.pages; + size_t size = aral_next_allocation_size___adders_lock_needed(ar); + ar->adders.allocating_elements += size / ar->config.element_size; + aral_adders_unlock(ar); + + page = aral_create_page___no_lock_needed(ar, size TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); + + aral_lock(ar); + aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ar, page); + + aral_adders_lock(ar); + ar->adders.allocating_elements -= size / ar->config.element_size; + aral_adders_unlock(ar); + + // we have a page that is all empty + // and only aral_lock() is held, so + // break the loop + break; + } + + aral_adders_unlock(ar); } + + aral_lock(ar); + page = ar->aral_lock.pages; } + __atomic_sub_fetch(&ar->atomic.allocators, 1, __ATOMIC_RELAXED); + // we have a page // and aral locked { ARAL_PAGE *first = ar->aral_lock.pages; - ARAL_PAGE *second = first->next; + ARAL_PAGE *second = first->aral_lock.next; if (!second || !second->aral_lock.free_elements || first->aral_lock.free_elements <= second->aral_lock.free_elements + ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST) page = first; else { - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, second, prev, next); - DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, second, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, second, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, second, aral_lock.prev, aral_lock.next); page = second; } } @@ -401,8 +455,8 @@ static inline ARAL_PAGE *aral_acquire_a_free_slot(ARAL *ar TRACE_ALLOCATIONS_FUN // we are done with this page // move the full page last // so that pages with free items remain first in the list - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); - DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); } aral_unlock(ar); @@ -414,7 +468,7 @@ void *aral_mallocz_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAM ARAL_PAGE *page = aral_acquire_a_free_slot(ar TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); - netdata_spinlock_lock(&page->free.spinlock); + aral_page_free_lock(ar, page); internal_fatal(!page->free.list, "ARAL: '%s' free item to use, cannot be NULL.", ar->config.name); @@ -445,7 +499,7 @@ void *aral_mallocz_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAM aral_free_validate_internal_check(ar, fr); } - netdata_spinlock_unlock(&page->free.spinlock); + aral_page_free_unlock(ar, page); // put the page pointer after the element uint8_t *data = (uint8_t *)found_fr; @@ -453,9 +507,9 @@ void *aral_mallocz_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAM *page_ptr = page; if(unlikely(ar->config.mmap.enabled)) - __atomic_add_fetch(&aral_globals.atomic.mmap.used, ar->config.element_size, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->mmap.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); else - __atomic_add_fetch(&aral_globals.atomic.malloc.used, ar->config.element_size, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->malloc.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); return (void *)found_fr; } @@ -503,35 +557,35 @@ static void aral_defrag_sorted_page_position___aral_lock_needed(ARAL *ar, ARAL_P int action = 0; (void)action; size_t move_later = 0, move_earlier = 0; - for(tmp = page->next ; + for(tmp = page->aral_lock.next ; tmp && tmp->aral_lock.free_elements && tmp->aral_lock.free_elements < page->aral_lock.free_elements ; - tmp = tmp->next) + tmp = tmp->aral_lock.next) move_later++; - if(!tmp && page->next) { - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); - DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); + if(!tmp && page->aral_lock.next) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); action = 1; } - else if(tmp != page->next) { - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); - DOUBLE_LINKED_LIST_INSERT_ITEM_BEFORE_UNSAFE(ar->aral_lock.pages, tmp, page, prev, next); + else if(tmp != page->aral_lock.next) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_INSERT_ITEM_BEFORE_UNSAFE(ar->aral_lock.pages, tmp, page, aral_lock.prev, aral_lock.next); action = 2; } else { - for(tmp = (page == ar->aral_lock.pages) ? NULL : page->prev ; + for(tmp = (page == ar->aral_lock.pages) ? NULL : page->aral_lock.prev ; tmp && (!tmp->aral_lock.free_elements || tmp->aral_lock.free_elements > page->aral_lock.free_elements); - tmp = (tmp == ar->aral_lock.pages) ? NULL : tmp->prev) + tmp = (tmp == ar->aral_lock.pages) ? NULL : tmp->aral_lock.prev) move_earlier++; if(!tmp) { - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); - DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); action = 3; } - else if(tmp != page->prev){ - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); - DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, tmp, page, prev, next); + else if(tmp != page->aral_lock.prev){ + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, tmp, page, aral_lock.prev, aral_lock.next); action = 4; } } @@ -539,10 +593,10 @@ static void aral_defrag_sorted_page_position___aral_lock_needed(ARAL *ar, ARAL_P ar->aral_lock.defragment_operations++; ar->aral_lock.defragment_linked_list_traversals += move_earlier + move_later; - internal_fatal(page->next && page->next->aral_lock.free_elements && page->next->aral_lock.free_elements < page->aral_lock.free_elements, + internal_fatal(page->aral_lock.next && page->aral_lock.next->aral_lock.free_elements && page->aral_lock.next->aral_lock.free_elements < page->aral_lock.free_elements, "ARAL: '%s' item should be later in the list", ar->config.name); - internal_fatal(page != ar->aral_lock.pages && (!page->prev->aral_lock.free_elements || page->prev->aral_lock.free_elements > page->aral_lock.free_elements), + internal_fatal(page != ar->aral_lock.pages && (!page->aral_lock.prev->aral_lock.free_elements || page->aral_lock.prev->aral_lock.free_elements > page->aral_lock.free_elements), "ARAL: '%s' item should be earlier in the list", ar->config.name); } @@ -551,8 +605,8 @@ static inline void aral_move_page_with_free_list___aral_lock_needed(ARAL *ar, AR // we are the first already return; - if(likely(!ar->config.defragment)) { - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); + if(likely(!(ar->config.options & ARAL_DEFRAGMENT))) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ar, page); } else @@ -566,18 +620,18 @@ void aral_freez_internal(ARAL *ar, void *ptr TRACE_ALLOCATIONS_FUNCTION_DEFINITI ARAL_PAGE *page = aral_ptr_to_page___must_NOT_have_aral_lock(ar, ptr); if(unlikely(ar->config.mmap.enabled)) - __atomic_sub_fetch(&aral_globals.atomic.mmap.used, ar->config.element_size, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->mmap.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); else - __atomic_sub_fetch(&aral_globals.atomic.malloc.used, ar->config.element_size, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->malloc.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); // make this element available ARAL_FREE *fr = (ARAL_FREE *)ptr; fr->size = ar->config.element_size; - netdata_spinlock_lock(&page->free.spinlock); + aral_page_free_lock(ar, page); fr->next = page->free.list; page->free.list = fr; - netdata_spinlock_unlock(&page->free.spinlock); + aral_page_free_unlock(ar, page); aral_lock(ar); @@ -603,9 +657,15 @@ void aral_freez_internal(ARAL *ar, void *ptr TRACE_ALLOCATIONS_FUNCTION_DEFINITI // if the page is empty, release it if(unlikely(!page->aral_lock.used_elements)) { - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); + bool is_this_page_the_last_one = ar->aral_lock.pages == page && !page->aral_lock.next; + + if(!is_this_page_the_last_one) + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + aral_unlock(ar); - aral_del_page___no_lock_needed(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); + + if(!is_this_page_the_last_one) + aral_del_page___no_lock_needed(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); } else { aral_move_page_with_free_list___aral_lock_needed(ar, page); @@ -618,27 +678,44 @@ void aral_destroy_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS ARAL_PAGE *page; while((page = ar->aral_lock.pages)) { - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); aral_del_page___no_lock_needed(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); } aral_unlock(ar); + + if(ar->config.options & ARAL_ALLOCATED_STATS) + freez(ar->stats); + freez(ar); } -ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_elements, const char *filename, char **cache_dir, bool mmap, bool lockless) { +size_t aral_element_size(ARAL *ar) { + return ar->config.requested_element_size; +} + +ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_size, + struct aral_statistics *stats, const char *filename, char **cache_dir, bool mmap, bool lockless) { ARAL *ar = callocz(1, sizeof(ARAL)); + ar->config.options = (lockless) ? ARAL_LOCKLESS : 0; ar->config.requested_element_size = element_size; ar->config.initial_page_elements = initial_page_elements; - ar->config.max_page_elements = max_page_elements; + ar->config.requested_max_page_size = max_page_size; ar->config.mmap.filename = filename; ar->config.mmap.cache_dir = cache_dir; ar->config.mmap.enabled = mmap; - ar->config.lockless = lockless; - ar->config.defragment = false; strncpyz(ar->config.name, name, ARAL_MAX_NAME); netdata_spinlock_init(&ar->aral_lock.spinlock); + if(stats) { + ar->stats = stats; + ar->config.options &= ~ARAL_ALLOCATED_STATS; + } + else { + ar->stats = callocz(1, sizeof(struct aral_statistics)); + ar->config.options |= ARAL_ALLOCATED_STATS; + } + long int page_size = sysconf(_SC_PAGE_SIZE); if (unlikely(page_size == -1)) ar->config.natural_page_size = 4096; @@ -659,6 +736,8 @@ ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_ele // and finally align it to the natural alignment ar->config.element_size = natural_alignment(ar->config.element_size, ARAL_NATURAL_ALIGNMENT); + ar->config.max_page_elements = ar->config.requested_max_page_size / ar->config.element_size; + // we write the page pointer just after each element ar->config.page_ptr_offset = ar->config.element_size - sizeof(uintptr_t); @@ -709,20 +788,106 @@ ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_ele "ARAL: '%s' " "element size %zu (requested %zu bytes), " "min elements per page %zu (requested %zu), " - "max elements per page %zu (requested %zu), " - "max page size %zu bytes, " + "max elements per page %zu, " + "max page size %zu bytes (requested %zu) " , ar->config.name , ar->config.element_size, ar->config.requested_element_size , ar->adders.allocation_size / ar->config.element_size, ar->config.initial_page_elements - , ar->config.max_allocation_size / ar->config.element_size, ar->config.max_page_elements - , ar->config.max_allocation_size + , ar->config.max_allocation_size / ar->config.element_size + , ar->config.max_allocation_size, ar->config.requested_max_page_size ); - __atomic_add_fetch(&aral_globals.atomic.structures.allocations, 1, __ATOMIC_RELAXED); - __atomic_add_fetch(&aral_globals.atomic.structures.allocated, sizeof(ARAL), __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->structures.allocated_bytes, sizeof(ARAL), __ATOMIC_RELAXED); + return ar; +} + +// ---------------------------------------------------------------------------- +// global aral caching + +#define ARAL_BY_SIZE_MAX_SIZE 1024 + +struct aral_by_size { + ARAL *ar; + int32_t refcount; +}; + +struct { + struct aral_statistics shared_statistics; + SPINLOCK spinlock; + struct aral_by_size array[ARAL_BY_SIZE_MAX_SIZE + 1]; +} aral_by_size_globals = {}; + +struct aral_statistics *aral_by_size_statistics(void) { + return &aral_by_size_globals.shared_statistics; +} + +size_t aral_by_size_structures(void) { + return aral_structures_from_stats(&aral_by_size_globals.shared_statistics); +} + +size_t aral_by_size_overhead(void) { + return aral_overhead_from_stats(&aral_by_size_globals.shared_statistics); +} + +ARAL *aral_by_size_acquire(size_t size) { + netdata_spinlock_lock(&aral_by_size_globals.spinlock); + + ARAL *ar = NULL; + + if(size <= ARAL_BY_SIZE_MAX_SIZE && aral_by_size_globals.array[size].ar) { + ar = aral_by_size_globals.array[size].ar; + aral_by_size_globals.array[size].refcount++; + + internal_fatal(aral_element_size(ar) != size, "DICTIONARY: aral has size %zu but we want %zu", + aral_element_size(ar), size); + } + + if(!ar) { + char buf[30 + 1]; + snprintf(buf, 30, "size-%zu", size); + ar = aral_create(buf, + size, + 0, + 65536 * ((size / 150) + 1), + &aral_by_size_globals.shared_statistics, + NULL, NULL, false, false); + + if(size <= ARAL_BY_SIZE_MAX_SIZE) { + aral_by_size_globals.array[size].ar = ar; + aral_by_size_globals.array[size].refcount = 1; + } + } + + netdata_spinlock_unlock(&aral_by_size_globals.spinlock); + return ar; } +void aral_by_size_release(ARAL *ar) { + size_t size = aral_element_size(ar); + + if(size <= ARAL_BY_SIZE_MAX_SIZE) { + netdata_spinlock_lock(&aral_by_size_globals.spinlock); + + internal_fatal(aral_by_size_globals.array[size].ar != ar, + "ARAL BY SIZE: aral pointers do not match"); + + if(aral_by_size_globals.array[size].refcount <= 0) + fatal("ARAL BY SIZE: double release detected"); + + aral_by_size_globals.array[size].refcount--; + if(!aral_by_size_globals.array[size].refcount) { + aral_destroy(aral_by_size_globals.array[size].ar); + aral_by_size_globals.array[size].ar = NULL; + } + + netdata_spinlock_unlock(&aral_by_size_globals.spinlock); + } + else + aral_destroy(ar); +} + // ---------------------------------------------------------------------------- // unittest @@ -774,7 +939,7 @@ static void *aral_test_thread(void *ptr) { pointers[i] = NULL; } - if (auc->single_threaded && ar->aral_lock.pages) { + if (auc->single_threaded && ar->aral_lock.pages && ar->aral_lock.pages->aral_lock.used_elements) { fprintf(stderr, "\n\nARAL leftovers detected (1)\n\n"); __atomic_add_fetch(&auc->errors, 1, __ATOMIC_RELAXED); } @@ -789,7 +954,7 @@ static void *aral_test_thread(void *ptr) { size_t increment = elements / ar->config.max_page_elements; for (size_t all = increment; all <= elements / 2; all += increment) { - size_t to_free = all % ar->config.max_page_elements; + size_t to_free = (all % ar->config.max_page_elements) + 1; size_t step = elements / to_free; if(!step) step = 1; @@ -814,7 +979,7 @@ static void *aral_test_thread(void *ptr) { pointers[i] = NULL; } - if (auc->single_threaded && ar->aral_lock.pages) { + if (auc->single_threaded && ar->aral_lock.pages && ar->aral_lock.pages->aral_lock.used_elements) { fprintf(stderr, "\n\nARAL leftovers detected (2)\n\n"); __atomic_add_fetch(&auc->errors, 1, __ATOMIC_RELAXED); } @@ -830,12 +995,10 @@ int aral_stress_test(size_t threads, size_t elements, size_t seconds) { fprintf(stderr, "Running stress test of %zu threads, with %zu elements each, for %zu seconds...\n", threads, elements, seconds); - memset(&aral_globals, 0, sizeof(aral_globals)); - struct aral_unittest_config auc = { .single_threaded = false, .threads = threads, - .ar = aral_create("aral-test", 20, 10, 1024, "test-aral", NULL, false, false), + .ar = aral_create("aral-stress-test", 20, 0, 8192, NULL, "aral-stress-test", NULL, false, false), .elements = elements, .errors = 0, }; @@ -880,7 +1043,7 @@ int aral_stress_test(size_t threads, size_t elements, size_t seconds) { usec_t ended_ut = now_monotonic_usec(); - if (auc.ar->aral_lock.pages) { + if (auc.ar->aral_lock.pages && auc.ar->aral_lock.pages->aral_lock.used_elements) { fprintf(stderr, "\n\nARAL leftovers detected (3)\n\n"); __atomic_add_fetch(&auc.errors, 1, __ATOMIC_RELAXED); } @@ -903,7 +1066,7 @@ int aral_unittest(size_t elements) { struct aral_unittest_config auc = { .single_threaded = true, .threads = 1, - .ar = aral_create("aral-test", 20, 10, 1024, "test-aral", &cache_dir, false, false), + .ar = aral_create("aral-test", 20, 0, 8192, NULL, "aral-test", &cache_dir, false, false), .elements = elements, .errors = 0, }; diff --git a/libnetdata/aral/aral.h b/libnetdata/aral/aral.h index 4b1c22f9a9..96f5a9c44f 100644 --- a/libnetdata/aral/aral.h +++ b/libnetdata/aral/aral.h @@ -8,9 +8,41 @@ typedef struct aral ARAL; -ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_elements, const char *filename, char **cache_dir, bool mmap, bool lockless); +struct aral_statistics { + struct { + size_t allocations; + size_t allocated_bytes; + } structures; + + struct { + size_t allocations; + size_t allocated_bytes; + size_t used_bytes; + } malloc; + + struct { + size_t allocations; + size_t allocated_bytes; + size_t used_bytes; + } mmap; +}; + +ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_size, + struct aral_statistics *stats, const char *filename, char **cache_dir, bool mmap, bool lockless); +size_t aral_element_size(ARAL *ar); +size_t aral_overhead(ARAL *ar); +size_t aral_structures(ARAL *ar); +struct aral_statistics *aral_statistics(ARAL *ar); +size_t aral_structures_from_stats(struct aral_statistics *stats); +size_t aral_overhead_from_stats(struct aral_statistics *stats); + + |