diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2022-11-22 22:41:50 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-22 22:41:50 +0200 |
commit | 77a304f52e4c6aadef0eac06b4869b7e1c829175 (patch) | |
tree | 5e2141c0acc55321edaf241b794c022ec1d6d244 | |
parent | 2d5f3acf71f0c759056a3269987fee484566bc4c (diff) |
improve performance of worker utilization statistics (#14034)
-rw-r--r-- | daemon/global_statistics.c | 78 | ||||
-rw-r--r-- | libnetdata/libnetdata.h | 1 | ||||
-rw-r--r-- | libnetdata/worker_utilization/worker_utilization.c | 59 | ||||
-rw-r--r-- | libnetdata/worker_utilization/worker_utilization.h | 1 |
4 files changed, 88 insertions, 51 deletions
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index 21c4c8d005..10a3b2a430 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -1697,9 +1697,9 @@ struct worker_job_type_gs { struct worker_thread { pid_t pid; - int enabled; + bool enabled; - int cpu_enabled; + bool cpu_enabled; double cpu; kernel_uint_t utime; @@ -1715,6 +1715,7 @@ struct worker_thread { usec_t busy_time; struct worker_thread *next; + struct worker_thread *prev; }; struct worker_utilization { @@ -1727,6 +1728,7 @@ struct worker_utilization { struct worker_job_type_gs per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES]; + size_t workers_max_job_id; size_t workers_registered; size_t workers_busy; usec_t workers_total_busy_time; @@ -1975,7 +1977,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { { size_t i; - for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + for(i = 0; i <= wu->workers_max_job_id ;i++) { if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) continue; @@ -2018,7 +2020,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { { size_t i; - for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + for(i = 0; i <= wu->workers_max_job_id ;i++) { if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) continue; @@ -2073,7 +2075,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { { size_t i; - for (i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES; i++) { + for (i = 0; i <= wu->workers_max_job_id; i++) { if(wu->per_job_type[i].type != WORKER_METRIC_ABSOLUTE) continue; @@ -2129,7 +2131,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { { size_t i; - for (i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES; i++) { + for (i = 0; i <= wu->workers_max_job_id ; i++) { if(wu->per_job_type[i].type != WORKER_METRIC_INCREMENT && wu->per_job_type[i].type != WORKER_METRIC_INCREMENTAL_TOTAL) continue; @@ -2214,8 +2216,8 @@ static void workers_utilization_reset_statistics(struct worker_utilization *wu) struct worker_thread *wt; for(wt = wu->threads; wt ; wt = wt->next) { - wt->enabled = 0; - wt->cpu_enabled = 0; + wt->enabled = false; + wt->cpu_enabled = false; } } @@ -2243,31 +2245,30 @@ static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_ #endif } +static Pvoid_t workers_by_pid_JudyL_array = NULL; + static void workers_threads_cleanup(struct worker_utilization *wu) { - struct worker_thread *t; - - // free threads at the beginning of the linked list - while(wu->threads && !wu->threads->enabled) { - t = wu->threads; - wu->threads = t->next; - t->next = NULL; - freez(t); + struct worker_thread *t = wu->threads; + while(t) { + struct worker_thread *next = t->next; + + if(!t->enabled) { + JudyLDel(&workers_by_pid_JudyL_array, t->pid, PJE0); + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wu->threads, t, prev, next); + freez(t); + } + + t = next; } + } - // free threads in the middle of the linked list - for(t = wu->threads; t && t->next ; t = t->next) { - if(t->next->enabled) continue; +static struct worker_thread *worker_thread_find(struct worker_utilization *wu __maybe_unused, pid_t pid) { + struct worker_thread *wt = NULL; - struct worker_thread *to_remove = t->next; - t->next = to_remove->next; - to_remove->next = NULL; - freez(to_remove); - } -} + Pvoid_t *PValue = JudyLGet(workers_by_pid_JudyL_array, pid, PJE0); + if(PValue) + wt = *PValue; -static struct worker_thread *worker_thread_find(struct worker_utilization *wu, pid_t pid) { - struct worker_thread *wt; - for(wt = wu->threads; wt && wt->pid != pid ; wt = wt->next) ; return wt; } @@ -2277,9 +2278,11 @@ static struct worker_thread *worker_thread_create(struct worker_utilization *wu, wt = (struct worker_thread *)callocz(1, sizeof(struct worker_thread)); wt->pid = pid; + Pvoid_t *PValue = JudyLIns(&workers_by_pid_JudyL_array, pid, PJE0); + *PValue = wt; + // link it - wt->next = wu->threads; - wu->threads = wt; + DOUBLE_LINKED_LIST_APPEND_UNSAFE(wu->threads, wt, prev, next); return wt; } @@ -2295,6 +2298,7 @@ static struct worker_thread *worker_thread_find_or_create(struct worker_utilizat static void worker_utilization_charts_callback(void *ptr , pid_t pid __maybe_unused , const char *thread_tag __maybe_unused + , size_t max_job_id __maybe_unused , size_t utilization_usec __maybe_unused , size_t duration_usec __maybe_unused , size_t jobs_started __maybe_unused @@ -2311,7 +2315,7 @@ static void worker_utilization_charts_callback(void *ptr // find the worker_thread in the list struct worker_thread *wt = worker_thread_find_or_create(wu, pid); - wt->enabled = 1; + wt->enabled = true; wt->busy_time = utilization_usec; wt->jobs_started = jobs_started; @@ -2319,6 +2323,9 @@ static void worker_utilization_charts_callback(void *ptr wt->stime_old = wt->stime; wt->collected_time_old = wt->collected_time; + if(max_job_id > wu->workers_max_job_id) + wu->workers_max_job_id = max_job_id; + wu->workers_total_busy_time += utilization_usec; wu->workers_total_duration += duration_usec; wu->workers_total_jobs_started += jobs_started; @@ -2334,7 +2341,7 @@ static void worker_utilization_charts_callback(void *ptr // accumulate per job type statistics size_t i; - for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + for(i = 0; i <= max_job_id ;i++) { if(!wu->per_job_type[i].name && job_types_names[i]) wu->per_job_type[i].name = string_dup(job_types_names[i]); @@ -2372,13 +2379,13 @@ static void worker_utilization_charts_callback(void *ptr double stime = (double)(wt->stime - wt->stime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; double cpu = utime + stime; wt->cpu = cpu; - wt->cpu_enabled = 1; + wt->cpu_enabled = true; wu->workers_cpu_total += cpu; if(cpu < wu->workers_cpu_min) wu->workers_cpu_min = cpu; if(cpu > wu->workers_cpu_max) wu->workers_cpu_max = cpu; } - wu->workers_cpu_registered += wt->cpu_enabled; + wu->workers_cpu_registered += (wt->cpu_enabled) ? 1 : 0; } static void worker_utilization_charts(void) { @@ -2420,7 +2427,8 @@ static void worker_utilization_finish(void) { // mark all threads as not enabled struct worker_thread *t; - for(t = wu->threads; t ; t = t->next) t->enabled = 0; + for(t = wu->threads; t ; t = t->next) + t->enabled = false; // let the cleanup job free them workers_threads_cleanup(wu); diff --git a/libnetdata/libnetdata.h b/libnetdata/libnetdata.h index ce2f8682b0..58eaa9dede 100644 --- a/libnetdata/libnetdata.h +++ b/libnetdata/libnetdata.h @@ -421,6 +421,7 @@ bool run_command_and_copy_output_to_stdout(const char *command, int max_line_len void netdata_cleanup_and_exit(int ret) NORETURN; void send_statistics(const char *action, const char *action_result, const char *action_data); extern char *netdata_configured_host_prefix; +#include "libjudy/src/Judy.h" #include "os.h" #include "storage_number/storage_number.h" #include "threads/threads.h" diff --git a/libnetdata/worker_utilization/worker_utilization.c b/libnetdata/worker_utilization/worker_utilization.c index 114c4ad9f8..700f88a736 100644 --- a/libnetdata/worker_utilization/worker_utilization.c +++ b/libnetdata/worker_utilization/worker_utilization.c @@ -24,7 +24,6 @@ struct worker { pid_t pid; const char *tag; const char *workname; - uint32_t workname_hash; // statistics controlled variables volatile usec_t statistics_last_checkpoint; @@ -32,6 +31,7 @@ struct worker { usec_t statistics_last_busy_time; // the worker controlled variables + size_t worker_max_job_id; volatile size_t job_id; volatile size_t jobs_started; volatile usec_t busy_time; @@ -44,9 +44,9 @@ struct worker { struct worker *prev; }; -static netdata_mutex_t base_lock = NETDATA_MUTEX_INITIALIZER; -static struct worker *base = NULL; +static netdata_mutex_t workers_base_lock = NETDATA_MUTEX_INITIALIZER; static __thread struct worker *worker = NULL; +static Pvoid_t workers_per_workname_JudyHS_array = NULL; void worker_register(const char *workname) { if(unlikely(worker)) return; @@ -55,16 +55,24 @@ void worker_register(const char *workname) { worker->pid = gettid(); worker->tag = strdupz(netdata_thread_tag()); worker->workname = strdupz(workname); - worker->workname_hash = simple_hash(worker->workname); usec_t now = now_realtime_usec(); worker->statistics_last_checkpoint = now; worker->last_action_timestamp = now; worker->last_action = WORKER_IDLE; - netdata_mutex_lock(&base_lock); - DOUBLE_LINKED_LIST_PREPEND_UNSAFE(base, worker, prev, next); - netdata_mutex_unlock(&base_lock); + size_t workname_size = strlen(workname) + 1; + netdata_mutex_lock(&workers_base_lock); + + Pvoid_t *PValue = JudyHSGet(workers_per_workname_JudyHS_array, (void *)workname, workname_size); + if(!PValue) + PValue = JudyHSIns(&workers_per_workname_JudyHS_array, (void *)workname, workname_size, PJE0); + + struct worker *base = *PValue; + DOUBLE_LINKED_LIST_APPEND_UNSAFE(base, worker, prev, next); + *PValue = base; + + netdata_mutex_unlock(&workers_base_lock); } void worker_register_job_custom_metric(size_t job_id, const char *name, const char *units, WORKER_METRIC_TYPE type) { @@ -74,6 +82,10 @@ void worker_register_job_custom_metric(size_t job_id, const char *name, const ch error("WORKER_UTILIZATION: job_id %zu is too big. Max is %zu", job_id, (size_t)(WORKER_UTILIZATION_MAX_JOB_TYPES - 1)); return; } + + if(job_id > worker->worker_max_job_id) + worker->worker_max_job_id = job_id; + if(worker->per_job_type[job_id].name) { if(strcmp(string2str(worker->per_job_type[job_id].name), name) != 0 || worker->per_job_type[job_id].type != type || strcmp(string2str(worker->per_job_type[job_id].units), units) != 0) error("WORKER_UTILIZATION: duplicate job registration: worker '%s' job id %zu is '%s', ignoring the later '%s'", worker->workname, job_id, string2str(worker->per_job_type[job_id].name), name); @@ -92,9 +104,18 @@ void worker_register_job_name(size_t job_id, const char *name) { void worker_unregister(void) { if(unlikely(!worker)) return; - netdata_mutex_lock(&base_lock); - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(base, worker, prev, next); - netdata_mutex_unlock(&base_lock); + size_t workname_size = strlen(worker->workname) + 1; + netdata_mutex_lock(&workers_base_lock); + Pvoid_t *PValue = JudyHSGet(workers_per_workname_JudyHS_array, (void *)worker->workname, workname_size); + if(PValue) { + struct worker *base = *PValue; + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(base, worker, prev, next); + *PValue = base; + + if(!base) + JudyHSDel(&workers_per_workname_JudyHS_array, (void *)worker->workname, workname_size, PJE0); + } + netdata_mutex_unlock(&workers_base_lock); for(int i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { string_freez(worker->per_job_type[i].name); @@ -170,6 +191,7 @@ void workers_foreach(const char *workname, void (*callback)( void *data , pid_t pid , const char *thread_tag + , size_t max_job_id , size_t utilization_usec , size_t duration_usec , size_t jobs_started, size_t is_running @@ -181,15 +203,18 @@ void workers_foreach(const char *workname, void (*callback)( , NETDATA_DOUBLE *job_custom_values ) , void *data) { - netdata_mutex_lock(&base_lock); - uint32_t hash = simple_hash(workname); + netdata_mutex_lock(&workers_base_lock); usec_t busy_time, delta; size_t i, jobs_started, jobs_running; + size_t workname_size = strlen(workname) + 1; + struct worker *base = NULL; + Pvoid_t *PValue = JudyHSGet(workers_per_workname_JudyHS_array, (void *)workname, workname_size); + if(PValue) + base = *PValue; + struct worker *p; DOUBLE_LINKED_LIST_FOREACH_FORWARD(base, p, prev, next) { - if(hash != p->workname_hash || strcmp(workname, p->workname) != 0) continue; - usec_t now = now_realtime_usec(); // find per job type statistics @@ -200,7 +225,8 @@ void workers_foreach(const char *workname, void (*callback)( usec_t per_job_type_busy_time[WORKER_UTILIZATION_MAX_JOB_TYPES]; NETDATA_DOUBLE per_job_custom_values[WORKER_UTILIZATION_MAX_JOB_TYPES]; - for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + size_t max_job_id = p->worker_max_job_id; + for(i = 0; i <= max_job_id ;i++) { per_job_type_name[i] = p->per_job_type[i].name; per_job_type_units[i] = p->per_job_type[i].units; per_job_metric_type[i] = p->per_job_type[i].type; @@ -286,6 +312,7 @@ void workers_foreach(const char *workname, void (*callback)( callback(data , p->pid , p->tag + , max_job_id , busy_time , delta , jobs_started @@ -299,5 +326,5 @@ void workers_foreach(const char *workname, void (*callback)( ); } - netdata_mutex_unlock(&base_lock); + netdata_mutex_unlock(&workers_base_lock); } diff --git a/libnetdata/worker_utilization/worker_utilization.h b/libnetdata/worker_utilization/worker_utilization.h index 954fcdcba7..04d24f1f73 100644 --- a/libnetdata/worker_utilization/worker_utilization.h +++ b/libnetdata/worker_utilization/worker_utilization.h @@ -30,6 +30,7 @@ void workers_foreach(const char *workname, void (*callback)( void *data , pid_t pid , const char *thread_tag + , size_t max_job_id , size_t utilization_usec , size_t duration_usec , size_t jobs_started |