From dbd1624837b9afb64111602431ece417d606b921 Mon Sep 17 00:00:00 2001 From: Emmanuel Vasilakis Date: Thu, 25 Feb 2021 15:05:24 +0200 Subject: cpu stats per query thread (#10634) * cpu stats per query thread * remove dim_name * limit calls to getrusage to MAX_GETRUSAGE_CALLS_PER_TICK per tick, per thread * proper parenthesis * use proper limits --- aclk/legacy/aclk_query.c | 6 ++++++ aclk/legacy/aclk_query.h | 3 +++ aclk/legacy/aclk_stats.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ aclk/legacy/aclk_stats.h | 1 + 4 files changed, 56 insertions(+) (limited to 'aclk') diff --git a/aclk/legacy/aclk_query.c b/aclk/legacy/aclk_query.c index 7ab534f16b..1150613a14 100644 --- a/aclk/legacy/aclk_query.c +++ b/aclk/legacy/aclk_query.c @@ -629,6 +629,12 @@ static int aclk_process_query(struct aclk_query_thread *t_info) aclk_metrics_per_sample.queries_dispatched++; aclk_queries_per_thread[t_info->idx]++; ACLK_STATS_UNLOCK; + + if (likely(getrusage_called_this_tick[t_info->idx] < MAX_GETRUSAGE_CALLS_PER_TICK)) { + getrusage(RUSAGE_THREAD, &rusage_per_thread[t_info->idx]); + getrusage_called_this_tick[t_info->idx]++; + } + } aclk_query_free(this_query); diff --git a/aclk/legacy/aclk_query.h b/aclk/legacy/aclk_query.h index 53eef13922..a0a6a5c666 100644 --- a/aclk/legacy/aclk_query.h +++ b/aclk/legacy/aclk_query.h @@ -8,8 +8,11 @@ #define ACLK_STABLE_TIMEOUT 3 // Minimum delay to mark AGENT as stable +#define MAX_GETRUSAGE_CALLS_PER_TICK 5 // Maximum number of times getrusage can be called per tick, per thread. + extern pthread_cond_t query_cond_wait; extern pthread_mutex_t query_lock_wait; +extern uint8_t *getrusage_called_this_tick; #define QUERY_THREAD_WAKEUP pthread_cond_signal(&query_cond_wait) #define QUERY_THREAD_WAKEUP_ALL pthread_cond_broadcast(&query_cond_wait) diff --git a/aclk/legacy/aclk_stats.c b/aclk/legacy/aclk_stats.c index 2a57cd6f0d..da0c82bd7a 100644 --- a/aclk/legacy/aclk_stats.c +++ b/aclk/legacy/aclk_stats.c @@ -11,8 +11,17 @@ struct aclk_qt_data { RRDDIM *dim; } *aclk_qt_data = NULL; +// ACLK per query thread cpu stats +struct aclk_cpu_data { + RRDDIM *user; + RRDDIM *system; + RRDSET *st; +} *aclk_cpu_data = NULL; + uint32_t *aclk_queries_per_thread = NULL; uint32_t *aclk_queries_per_thread_sample = NULL; +struct rusage *rusage_per_thread; +uint8_t *getrusage_called_this_tick = NULL; struct aclk_metrics aclk_metrics = { .online = 0, @@ -222,11 +231,42 @@ static void aclk_stats_mat_metric_process(struct aclk_metric_mat *metric, struct rrdset_done(metric->st); } +static void aclk_stats_cpu_threads(void) +{ + char id[100 + 1]; + char title[100 + 1]; + + for (int i = 0; i < query_thread_count; i++) { + if (unlikely(!aclk_cpu_data[i].st)) { + + snprintfz(id, 100, "aclk_thread%d_cpu", i); + snprintfz(title, 100, "Cpu Usage For Thread No %d", i); + + aclk_cpu_data[i].st = rrdset_create_localhost( + "netdata", id, NULL, "aclk", NULL, title, "milliseconds/s", + "netdata", "stats", 200008 + i, localhost->rrd_update_every, RRDSET_TYPE_STACKED); + + aclk_cpu_data[i].user = rrddim_add(aclk_cpu_data[i].st, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + aclk_cpu_data[i].system = rrddim_add(aclk_cpu_data[i].st, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + + } else + rrdset_next(aclk_cpu_data[i].st); + } + + for (int i = 0; i < query_thread_count; i++) { + rrddim_set_by_pointer(aclk_cpu_data[i].st, aclk_cpu_data[i].user, rusage_per_thread[i].ru_utime.tv_sec * 1000000ULL + rusage_per_thread[i].ru_utime.tv_usec); + rrddim_set_by_pointer(aclk_cpu_data[i].st, aclk_cpu_data[i].system, rusage_per_thread[i].ru_stime.tv_sec * 1000000ULL + rusage_per_thread[i].ru_stime.tv_usec); + rrdset_done(aclk_cpu_data[i].st); + } +} + void aclk_stats_thread_cleanup() { freez(aclk_qt_data); freez(aclk_queries_per_thread); freez(aclk_queries_per_thread_sample); + freez(aclk_cpu_data); + freez(rusage_per_thread); } void *aclk_stats_main_thread(void *ptr) @@ -235,8 +275,11 @@ void *aclk_stats_main_thread(void *ptr) query_thread_count = args->query_thread_count; aclk_qt_data = callocz(query_thread_count, sizeof(struct aclk_qt_data)); + aclk_cpu_data = callocz(query_thread_count, sizeof(struct aclk_cpu_data)); aclk_queries_per_thread = callocz(query_thread_count, sizeof(uint32_t)); aclk_queries_per_thread_sample = callocz(query_thread_count, sizeof(uint32_t)); + rusage_per_thread = callocz(query_thread_count, sizeof(struct rusage)); + getrusage_called_this_tick = callocz(query_thread_count, sizeof(uint8_t)); heartbeat_t hb; heartbeat_init(&hb); @@ -264,6 +307,7 @@ void *aclk_stats_main_thread(void *ptr) memcpy(aclk_queries_per_thread_sample, aclk_queries_per_thread, sizeof(uint32_t) * query_thread_count); memset(aclk_queries_per_thread, 0, sizeof(uint32_t) * query_thread_count); + memset(getrusage_called_this_tick, 0, sizeof(uint8_t) * query_thread_count); ACLK_STATS_UNLOCK; aclk_stats_collect(&per_sample, &permanent); @@ -275,6 +319,8 @@ void *aclk_stats_main_thread(void *ptr) aclk_stats_cloud_req(&per_sample); aclk_stats_query_threads(aclk_queries_per_thread_sample); + aclk_stats_cpu_threads(); + #ifdef NETDATA_INTERNAL_CHECKS aclk_stats_mat_metric_process(&aclk_mat_metrics.latency, &per_sample.latency); #endif diff --git a/aclk/legacy/aclk_stats.h b/aclk/legacy/aclk_stats.h index 7e74fdf882..f55c4adac0 100644 --- a/aclk/legacy/aclk_stats.h +++ b/aclk/legacy/aclk_stats.h @@ -83,6 +83,7 @@ extern struct aclk_metrics_per_sample { } aclk_metrics_per_sample; extern uint32_t *aclk_queries_per_thread; +extern struct rusage *rusage_per_thread; void *aclk_stats_main_thread(void *ptr); void aclk_stats_thread_cleanup(); -- cgit v1.2.3