diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2022-10-09 21:58:21 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-10-09 21:58:21 +0300 |
commit | 758d9c405d2d768a3c125052a02c7a1503b01bd8 (patch) | |
tree | de24c46008c9c7bf95270ebb8f3b117229c43db9 /daemon | |
parent | 067305602f373d12286e492143bf6cb2a32ffe31 (diff) |
full memory tracking and profiling of Netdata Agent (#13789)
* full memory tracking and profiling of Netdata Agent
* initialize dbengine only when it is needed
* handling of dbengine compiled but not available
* restore unittest
* restore unittest again
* more improvements about ifdef dbengine
* fix compilation when dbengine is not enabled
* check if dbengine is enabled on exit
* call freez() not free()
* aral unittest
* internal checks activate trace allocations; dev mode activates internal checks
Diffstat (limited to 'daemon')
-rw-r--r-- | daemon/global_statistics.c | 162 | ||||
-rw-r--r-- | daemon/main.c | 20 | ||||
-rw-r--r-- | daemon/service.c | 2 |
3 files changed, 174 insertions, 10 deletions
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index 1d4da897a5..79e0bb7711 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -11,8 +11,9 @@ #define WORKER_JOB_HEARTBEAT 4 #define WORKER_JOB_STRINGS 5 #define WORKER_JOB_DICTIONARIES 6 +#define WORKER_JOB_MALLOC_TRACE 7 -#if WORKER_UTILIZATION_MAX_JOB_TYPES < 7 +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8 #error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 5 #endif @@ -1571,6 +1572,153 @@ static void update_dictionary_category_charts(struct dictionary_categories *c) { } } +#ifdef NETDATA_TRACE_ALLOCATIONS + +struct memory_trace_data { + RRDSET *st_memory; + RRDSET *st_allocations; + RRDSET *st_avg_alloc; + RRDSET *st_ops; +}; + +static int do_memory_trace_item(void *item, void *data) { + struct memory_trace_data *tmp = data; + struct malloc_trace *p = item; + + // ------------------------------------------------------------------------ + + if(!p->rd_bytes) + p->rd_bytes = rrddim_add(tmp->st_memory, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number bytes = (collected_number)__atomic_load_n(&p->bytes, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_memory, p->rd_bytes, bytes); + + // ------------------------------------------------------------------------ + + if(!p->rd_allocations) + p->rd_allocations = rrddim_add(tmp->st_allocations, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number allocs = (collected_number)__atomic_load_n(&p->allocations, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_allocations, p->rd_allocations, allocs); + + // ------------------------------------------------------------------------ + + if(!p->rd_avg_alloc) + p->rd_avg_alloc = rrddim_add(tmp->st_avg_alloc, p->function, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + collected_number avg_alloc = (allocs)?(bytes * 100 / allocs):0; + rrddim_set_by_pointer(tmp->st_avg_alloc, p->rd_avg_alloc, avg_alloc); + + // ------------------------------------------------------------------------ + + if(!p->rd_ops) + p->rd_ops = rrddim_add(tmp->st_ops, p->function, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + collected_number ops = 0; + ops += (collected_number)__atomic_load_n(&p->malloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->calloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->realloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->strdup_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->free_calls, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_ops, p->rd_ops, ops); + + // ------------------------------------------------------------------------ + + return 1; +} +static void malloc_trace_statistics(void) { + static struct memory_trace_data tmp = { + .st_memory = NULL, + .st_allocations = NULL, + .st_avg_alloc = NULL, + .st_ops = NULL, + }; + + if(!tmp.st_memory) { + tmp.st_memory = rrdset_create_localhost( + "netdata" + , "memory_size" + , NULL + , "memory" + , "netdata.memory.size" + , "Netdata Memory Used by Function" + , "bytes" + , "netdata" + , "stats" + , 900000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + else + rrdset_next(tmp.st_memory); + + if(!tmp.st_ops) { + tmp.st_ops = rrdset_create_localhost( + "netdata" + , "memory_operations" + , NULL + , "memory" + , "netdata.memory.operations" + , "Netdata Memory Operations by Function" + , "ops/s" + , "netdata" + , "stats" + , 900001 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + else + rrdset_next(tmp.st_ops); + + if(!tmp.st_allocations) { + tmp.st_allocations = rrdset_create_localhost( + "netdata" + , "memory_allocations" + , NULL + , "memory" + , "netdata.memory.allocations" + , "Netdata Memory Allocations by Function" + , "allocations" + , "netdata" + , "stats" + , 900002 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + else + rrdset_next(tmp.st_allocations); + + if(!tmp.st_avg_alloc) { + tmp.st_avg_alloc = rrdset_create_localhost( + "netdata" + , "memory_avg_alloc" + , NULL + , "memory" + , "netdata.memory.avg_alloc" + , "Netdata Average Allocation Size by Function" + , "bytes" + , "netdata" + , "stats" + , 900003 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + else + rrdset_next(tmp.st_avg_alloc); + + malloc_trace_walkthrough(do_memory_trace_item, &tmp); + + rrdset_done(tmp.st_memory); + rrdset_done(tmp.st_ops); + rrdset_done(tmp.st_allocations); + rrdset_done(tmp.st_avg_alloc); +} +#endif + static void dictionary_statistics(void) { for(int i = 0; dictionary_categories[i].stats ;i++) { update_dictionary_category_charts(&dictionary_categories[i]); @@ -2375,6 +2523,7 @@ void *global_statistics_main(void *ptr) worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine"); worker_register_job_name(WORKER_JOB_STRINGS, "strings"); worker_register_job_name(WORKER_JOB_DICTIONARIES, "dictionaries"); + worker_register_job_name(WORKER_JOB_MALLOC_TRACE, "malloc_trace"); netdata_thread_cleanup_push(global_statistics_cleanup, ptr); @@ -2404,8 +2553,10 @@ void *global_statistics_main(void *ptr) worker_is_busy(WORKER_JOB_REGISTRY); registry_statistics(); - worker_is_busy(WORKER_JOB_DBENGINE); - dbengine_statistics_charts(); + if(dbengine_enabled) { + worker_is_busy(WORKER_JOB_DBENGINE); + dbengine_statistics_charts(); + } worker_is_busy(WORKER_JOB_HEARTBEAT); update_heartbeat_charts(); @@ -2415,6 +2566,11 @@ void *global_statistics_main(void *ptr) worker_is_busy(WORKER_JOB_DICTIONARIES); dictionary_statistics(); + +#ifdef NETDATA_TRACE_ALLOCATIONS + worker_is_busy(WORKER_JOB_MALLOC_TRACE); + malloc_trace_statistics(); +#endif } netdata_thread_cleanup_pop(1); diff --git a/daemon/main.c b/daemon/main.c index 20b6068c87..8f3477ad97 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -55,13 +55,17 @@ void netdata_cleanup_and_exit(int ret) { // free the database info("EXIT: freeing database memory..."); #ifdef ENABLE_DBENGINE - for(int tier = 0; tier < storage_tiers ; tier++) - rrdeng_prepare_exit(multidb_ctx[tier]); + if(dbengine_enabled) { + for (int tier = 0; tier < storage_tiers; tier++) + rrdeng_prepare_exit(multidb_ctx[tier]); + } #endif rrdhost_free_all(); #ifdef ENABLE_DBENGINE - for(int tier = 0; tier < storage_tiers ; tier++) - rrdeng_exit(multidb_ctx[tier]); + if(dbengine_enabled) { + for (int tier = 0; tier < storage_tiers; tier++) + rrdeng_exit(multidb_ctx[tier]); + } #endif } sql_close_context_database(); @@ -255,7 +259,8 @@ void cancel_main_threads() { for (i = 0; static_threads[i].name != NULL ; i++) freez(static_threads[i].thread); - free(static_threads); + + freez(static_threads); } struct option_def option_definitions[] = { @@ -1001,6 +1006,8 @@ int main(int argc, char **argv) { if(string_unittest(10000)) return 1; if (dictionary_unittest(10000)) return 1; + if(aral_unittest(10000)) + return 1; if (rrdlabels_unittest()) return 1; if (ctx_unittest()) @@ -1023,6 +1030,9 @@ int main(int argc, char **argv) { else if(strcmp(optarg, "dicttest") == 0) { return dictionary_unittest(10000); } + else if(strcmp(optarg, "araltest") == 0) { + return aral_unittest(10000); + } else if(strcmp(optarg, "stringtest") == 0) { return string_unittest(10000); } diff --git a/daemon/service.c b/daemon/service.c index 71b377dbba..3a267402d1 100644 --- a/daemon/service.c +++ b/daemon/service.c @@ -216,10 +216,8 @@ restart_after_removal: info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) -#ifdef ENABLE_DBENGINE /* don't delete multi-host DB host files */ && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->storage_instance[0])) -#endif ) { worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); rrdhost_delete_charts(host); |