summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2022-10-09 21:58:21 +0300
committerGitHub <noreply@github.com>2022-10-09 21:58:21 +0300
commit758d9c405d2d768a3c125052a02c7a1503b01bd8 (patch)
treede24c46008c9c7bf95270ebb8f3b117229c43db9 /daemon
parent067305602f373d12286e492143bf6cb2a32ffe31 (diff)
full memory tracking and profiling of Netdata Agent (#13789)
* full memory tracking and profiling of Netdata Agent * initialize dbengine only when it is needed * handling of dbengine compiled but not available * restore unittest * restore unittest again * more improvements about ifdef dbengine * fix compilation when dbengine is not enabled * check if dbengine is enabled on exit * call freez() not free() * aral unittest * internal checks activate trace allocations; dev mode activates internal checks
Diffstat (limited to 'daemon')
-rw-r--r--daemon/global_statistics.c162
-rw-r--r--daemon/main.c20
-rw-r--r--daemon/service.c2
3 files changed, 174 insertions, 10 deletions
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index 1d4da897a5..79e0bb7711 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -11,8 +11,9 @@
#define WORKER_JOB_HEARTBEAT 4
#define WORKER_JOB_STRINGS 5
#define WORKER_JOB_DICTIONARIES 6
+#define WORKER_JOB_MALLOC_TRACE 7
-#if WORKER_UTILIZATION_MAX_JOB_TYPES < 7
+#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 5
#endif
@@ -1571,6 +1572,153 @@ static void update_dictionary_category_charts(struct dictionary_categories *c) {
}
}
+#ifdef NETDATA_TRACE_ALLOCATIONS
+
+struct memory_trace_data {
+ RRDSET *st_memory;
+ RRDSET *st_allocations;
+ RRDSET *st_avg_alloc;
+ RRDSET *st_ops;
+};
+
+static int do_memory_trace_item(void *item, void *data) {
+ struct memory_trace_data *tmp = data;
+ struct malloc_trace *p = item;
+
+ // ------------------------------------------------------------------------
+
+ if(!p->rd_bytes)
+ p->rd_bytes = rrddim_add(tmp->st_memory, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ collected_number bytes = (collected_number)__atomic_load_n(&p->bytes, __ATOMIC_RELAXED);
+ rrddim_set_by_pointer(tmp->st_memory, p->rd_bytes, bytes);
+
+ // ------------------------------------------------------------------------
+
+ if(!p->rd_allocations)
+ p->rd_allocations = rrddim_add(tmp->st_allocations, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ collected_number allocs = (collected_number)__atomic_load_n(&p->allocations, __ATOMIC_RELAXED);
+ rrddim_set_by_pointer(tmp->st_allocations, p->rd_allocations, allocs);
+
+ // ------------------------------------------------------------------------
+
+ if(!p->rd_avg_alloc)
+ p->rd_avg_alloc = rrddim_add(tmp->st_avg_alloc, p->function, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+
+ collected_number avg_alloc = (allocs)?(bytes * 100 / allocs):0;
+ rrddim_set_by_pointer(tmp->st_avg_alloc, p->rd_avg_alloc, avg_alloc);
+
+ // ------------------------------------------------------------------------
+
+ if(!p->rd_ops)
+ p->rd_ops = rrddim_add(tmp->st_ops, p->function, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+
+ collected_number ops = 0;
+ ops += (collected_number)__atomic_load_n(&p->malloc_calls, __ATOMIC_RELAXED);
+ ops += (collected_number)__atomic_load_n(&p->calloc_calls, __ATOMIC_RELAXED);
+ ops += (collected_number)__atomic_load_n(&p->realloc_calls, __ATOMIC_RELAXED);
+ ops += (collected_number)__atomic_load_n(&p->strdup_calls, __ATOMIC_RELAXED);
+ ops += (collected_number)__atomic_load_n(&p->free_calls, __ATOMIC_RELAXED);
+ rrddim_set_by_pointer(tmp->st_ops, p->rd_ops, ops);
+
+ // ------------------------------------------------------------------------
+
+ return 1;
+}
+static void malloc_trace_statistics(void) {
+ static struct memory_trace_data tmp = {
+ .st_memory = NULL,
+ .st_allocations = NULL,
+ .st_avg_alloc = NULL,
+ .st_ops = NULL,
+ };
+
+ if(!tmp.st_memory) {
+ tmp.st_memory = rrdset_create_localhost(
+ "netdata"
+ , "memory_size"
+ , NULL
+ , "memory"
+ , "netdata.memory.size"
+ , "Netdata Memory Used by Function"
+ , "bytes"
+ , "netdata"
+ , "stats"
+ , 900000
+ , localhost->rrd_update_every
+ , RRDSET_TYPE_STACKED
+ );
+ }
+ else
+ rrdset_next(tmp.st_memory);
+
+ if(!tmp.st_ops) {
+ tmp.st_ops = rrdset_create_localhost(
+ "netdata"
+ , "memory_operations"
+ , NULL
+ , "memory"
+ , "netdata.memory.operations"
+ , "Netdata Memory Operations by Function"
+ , "ops/s"
+ , "netdata"
+ , "stats"
+ , 900001
+ , localhost->rrd_update_every
+ , RRDSET_TYPE_LINE
+ );
+ }
+ else
+ rrdset_next(tmp.st_ops);
+
+ if(!tmp.st_allocations) {
+ tmp.st_allocations = rrdset_create_localhost(
+ "netdata"
+ , "memory_allocations"
+ , NULL
+ , "memory"
+ , "netdata.memory.allocations"
+ , "Netdata Memory Allocations by Function"
+ , "allocations"
+ , "netdata"
+ , "stats"
+ , 900002
+ , localhost->rrd_update_every
+ , RRDSET_TYPE_STACKED
+ );
+ }
+ else
+ rrdset_next(tmp.st_allocations);
+
+ if(!tmp.st_avg_alloc) {
+ tmp.st_avg_alloc = rrdset_create_localhost(
+ "netdata"
+ , "memory_avg_alloc"
+ , NULL
+ , "memory"
+ , "netdata.memory.avg_alloc"
+ , "Netdata Average Allocation Size by Function"
+ , "bytes"
+ , "netdata"
+ , "stats"
+ , 900003
+ , localhost->rrd_update_every
+ , RRDSET_TYPE_LINE
+ );
+ }
+ else
+ rrdset_next(tmp.st_avg_alloc);
+
+ malloc_trace_walkthrough(do_memory_trace_item, &tmp);
+
+ rrdset_done(tmp.st_memory);
+ rrdset_done(tmp.st_ops);
+ rrdset_done(tmp.st_allocations);
+ rrdset_done(tmp.st_avg_alloc);
+}
+#endif
+
static void dictionary_statistics(void) {
for(int i = 0; dictionary_categories[i].stats ;i++) {
update_dictionary_category_charts(&dictionary_categories[i]);
@@ -2375,6 +2523,7 @@ void *global_statistics_main(void *ptr)
worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine");
worker_register_job_name(WORKER_JOB_STRINGS, "strings");
worker_register_job_name(WORKER_JOB_DICTIONARIES, "dictionaries");
+ worker_register_job_name(WORKER_JOB_MALLOC_TRACE, "malloc_trace");
netdata_thread_cleanup_push(global_statistics_cleanup, ptr);
@@ -2404,8 +2553,10 @@ void *global_statistics_main(void *ptr)
worker_is_busy(WORKER_JOB_REGISTRY);
registry_statistics();
- worker_is_busy(WORKER_JOB_DBENGINE);
- dbengine_statistics_charts();
+ if(dbengine_enabled) {
+ worker_is_busy(WORKER_JOB_DBENGINE);
+ dbengine_statistics_charts();
+ }
worker_is_busy(WORKER_JOB_HEARTBEAT);
update_heartbeat_charts();
@@ -2415,6 +2566,11 @@ void *global_statistics_main(void *ptr)
worker_is_busy(WORKER_JOB_DICTIONARIES);
dictionary_statistics();
+
+#ifdef NETDATA_TRACE_ALLOCATIONS
+ worker_is_busy(WORKER_JOB_MALLOC_TRACE);
+ malloc_trace_statistics();
+#endif
}
netdata_thread_cleanup_pop(1);
diff --git a/daemon/main.c b/daemon/main.c
index 20b6068c87..8f3477ad97 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -55,13 +55,17 @@ void netdata_cleanup_and_exit(int ret) {
// free the database
info("EXIT: freeing database memory...");
#ifdef ENABLE_DBENGINE
- for(int tier = 0; tier < storage_tiers ; tier++)
- rrdeng_prepare_exit(multidb_ctx[tier]);
+ if(dbengine_enabled) {
+ for (int tier = 0; tier < storage_tiers; tier++)
+ rrdeng_prepare_exit(multidb_ctx[tier]);
+ }
#endif
rrdhost_free_all();
#ifdef ENABLE_DBENGINE
- for(int tier = 0; tier < storage_tiers ; tier++)
- rrdeng_exit(multidb_ctx[tier]);
+ if(dbengine_enabled) {
+ for (int tier = 0; tier < storage_tiers; tier++)
+ rrdeng_exit(multidb_ctx[tier]);
+ }
#endif
}
sql_close_context_database();
@@ -255,7 +259,8 @@ void cancel_main_threads() {
for (i = 0; static_threads[i].name != NULL ; i++)
freez(static_threads[i].thread);
- free(static_threads);
+
+ freez(static_threads);
}
struct option_def option_definitions[] = {
@@ -1001,6 +1006,8 @@ int main(int argc, char **argv) {
if(string_unittest(10000)) return 1;
if (dictionary_unittest(10000))
return 1;
+ if(aral_unittest(10000))
+ return 1;
if (rrdlabels_unittest())
return 1;
if (ctx_unittest())
@@ -1023,6 +1030,9 @@ int main(int argc, char **argv) {
else if(strcmp(optarg, "dicttest") == 0) {
return dictionary_unittest(10000);
}
+ else if(strcmp(optarg, "araltest") == 0) {
+ return aral_unittest(10000);
+ }
else if(strcmp(optarg, "stringtest") == 0) {
return string_unittest(10000);
}
diff --git a/daemon/service.c b/daemon/service.c
index 71b377dbba..3a267402d1 100644
--- a/daemon/service.c
+++ b/daemon/service.c
@@ -216,10 +216,8 @@ restart_after_removal:
info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid);
if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST)
-#ifdef ENABLE_DBENGINE
/* don't delete multi-host DB host files */
&& !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->storage_instance[0]))
-#endif
) {
worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS);
rrdhost_delete_charts(host);