summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2022-11-22 22:41:50 +0200
committerGitHub <noreply@github.com>2022-11-22 22:41:50 +0200
commit77a304f52e4c6aadef0eac06b4869b7e1c829175 (patch)
tree5e2141c0acc55321edaf241b794c022ec1d6d244
parent2d5f3acf71f0c759056a3269987fee484566bc4c (diff)
improve performance of worker utilization statistics (#14034)
-rw-r--r--daemon/global_statistics.c78
-rw-r--r--libnetdata/libnetdata.h1
-rw-r--r--libnetdata/worker_utilization/worker_utilization.c59
-rw-r--r--libnetdata/worker_utilization/worker_utilization.h1
4 files changed, 88 insertions, 51 deletions
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index 21c4c8d005..10a3b2a430 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -1697,9 +1697,9 @@ struct worker_job_type_gs {
struct worker_thread {
pid_t pid;
- int enabled;
+ bool enabled;
- int cpu_enabled;
+ bool cpu_enabled;
double cpu;
kernel_uint_t utime;
@@ -1715,6 +1715,7 @@ struct worker_thread {
usec_t busy_time;
struct worker_thread *next;
+ struct worker_thread *prev;
};
struct worker_utilization {
@@ -1727,6 +1728,7 @@ struct worker_utilization {
struct worker_job_type_gs per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES];
+ size_t workers_max_job_id;
size_t workers_registered;
size_t workers_busy;
usec_t workers_total_busy_time;
@@ -1975,7 +1977,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) {
{
size_t i;
- for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) {
+ for(i = 0; i <= wu->workers_max_job_id ;i++) {
if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY))
continue;
@@ -2018,7 +2020,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) {
{
size_t i;
- for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) {
+ for(i = 0; i <= wu->workers_max_job_id ;i++) {
if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY))
continue;
@@ -2073,7 +2075,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) {
{
size_t i;
- for (i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES; i++) {
+ for (i = 0; i <= wu->workers_max_job_id; i++) {
if(wu->per_job_type[i].type != WORKER_METRIC_ABSOLUTE)
continue;
@@ -2129,7 +2131,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) {
{
size_t i;
- for (i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES; i++) {
+ for (i = 0; i <= wu->workers_max_job_id ; i++) {
if(wu->per_job_type[i].type != WORKER_METRIC_INCREMENT && wu->per_job_type[i].type != WORKER_METRIC_INCREMENTAL_TOTAL)
continue;
@@ -2214,8 +2216,8 @@ static void workers_utilization_reset_statistics(struct worker_utilization *wu)
struct worker_thread *wt;
for(wt = wu->threads; wt ; wt = wt->next) {
- wt->enabled = 0;
- wt->cpu_enabled = 0;
+ wt->enabled = false;
+ wt->cpu_enabled = false;
}
}
@@ -2243,31 +2245,30 @@ static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_
#endif
}
+static Pvoid_t workers_by_pid_JudyL_array = NULL;
+
static void workers_threads_cleanup(struct worker_utilization *wu) {
- struct worker_thread *t;
-
- // free threads at the beginning of the linked list
- while(wu->threads && !wu->threads->enabled) {
- t = wu->threads;
- wu->threads = t->next;
- t->next = NULL;
- freez(t);
+ struct worker_thread *t = wu->threads;
+ while(t) {
+ struct worker_thread *next = t->next;
+
+ if(!t->enabled) {
+ JudyLDel(&workers_by_pid_JudyL_array, t->pid, PJE0);
+ DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wu->threads, t, prev, next);
+ freez(t);
+ }
+
+ t = next;
}
+ }
- // free threads in the middle of the linked list
- for(t = wu->threads; t && t->next ; t = t->next) {
- if(t->next->enabled) continue;
+static struct worker_thread *worker_thread_find(struct worker_utilization *wu __maybe_unused, pid_t pid) {
+ struct worker_thread *wt = NULL;
- struct worker_thread *to_remove = t->next;
- t->next = to_remove->next;
- to_remove->next = NULL;
- freez(to_remove);
- }
-}
+ Pvoid_t *PValue = JudyLGet(workers_by_pid_JudyL_array, pid, PJE0);
+ if(PValue)
+ wt = *PValue;
-static struct worker_thread *worker_thread_find(struct worker_utilization *wu, pid_t pid) {
- struct worker_thread *wt;
- for(wt = wu->threads; wt && wt->pid != pid ; wt = wt->next) ;
return wt;
}
@@ -2277,9 +2278,11 @@ static struct worker_thread *worker_thread_create(struct worker_utilization *wu,
wt = (struct worker_thread *)callocz(1, sizeof(struct worker_thread));
wt->pid = pid;
+ Pvoid_t *PValue = JudyLIns(&workers_by_pid_JudyL_array, pid, PJE0);
+ *PValue = wt;
+
// link it
- wt->next = wu->threads;
- wu->threads = wt;
+ DOUBLE_LINKED_LIST_APPEND_UNSAFE(wu->threads, wt, prev, next);
return wt;
}
@@ -2295,6 +2298,7 @@ static struct worker_thread *worker_thread_find_or_create(struct worker_utilizat
static void worker_utilization_charts_callback(void *ptr
, pid_t pid __maybe_unused
, const char *thread_tag __maybe_unused
+ , size_t max_job_id __maybe_unused
, size_t utilization_usec __maybe_unused
, size_t duration_usec __maybe_unused
, size_t jobs_started __maybe_unused
@@ -2311,7 +2315,7 @@ static void worker_utilization_charts_callback(void *ptr
// find the worker_thread in the list
struct worker_thread *wt = worker_thread_find_or_create(wu, pid);
- wt->enabled = 1;
+ wt->enabled = true;
wt->busy_time = utilization_usec;
wt->jobs_started = jobs_started;
@@ -2319,6 +2323,9 @@ static void worker_utilization_charts_callback(void *ptr
wt->stime_old = wt->stime;
wt->collected_time_old = wt->collected_time;
+ if(max_job_id > wu->workers_max_job_id)
+ wu->workers_max_job_id = max_job_id;
+
wu->workers_total_busy_time += utilization_usec;
wu->workers_total_duration += duration_usec;
wu->workers_total_jobs_started += jobs_started;
@@ -2334,7 +2341,7 @@ static void worker_utilization_charts_callback(void *ptr
// accumulate per job type statistics
size_t i;
- for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) {
+ for(i = 0; i <= max_job_id ;i++) {
if(!wu->per_job_type[i].name && job_types_names[i])
wu->per_job_type[i].name = string_dup(job_types_names[i]);
@@ -2372,13 +2379,13 @@ static void worker_utilization_charts_callback(void *ptr
double stime = (double)(wt->stime - wt->stime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta;
double cpu = utime + stime;
wt->cpu = cpu;
- wt->cpu_enabled = 1;
+ wt->cpu_enabled = true;
wu->workers_cpu_total += cpu;
if(cpu < wu->workers_cpu_min) wu->workers_cpu_min = cpu;
if(cpu > wu->workers_cpu_max) wu->workers_cpu_max = cpu;
}
- wu->workers_cpu_registered += wt->cpu_enabled;
+ wu->workers_cpu_registered += (wt->cpu_enabled) ? 1 : 0;
}
static void worker_utilization_charts(void) {
@@ -2420,7 +2427,8 @@ static void worker_utilization_finish(void) {
// mark all threads as not enabled
struct worker_thread *t;
- for(t = wu->threads; t ; t = t->next) t->enabled = 0;
+ for(t = wu->threads; t ; t = t->next)
+ t->enabled = false;
// let the cleanup job free them
workers_threads_cleanup(wu);
diff --git a/libnetdata/libnetdata.h b/libnetdata/libnetdata.h
index ce2f8682b0..58eaa9dede 100644
--- a/libnetdata/libnetdata.h
+++ b/libnetdata/libnetdata.h
@@ -421,6 +421,7 @@ bool run_command_and_copy_output_to_stdout(const char *command, int max_line_len
void netdata_cleanup_and_exit(int ret) NORETURN;
void send_statistics(const char *action, const char *action_result, const char *action_data);
extern char *netdata_configured_host_prefix;
+#include "libjudy/src/Judy.h"
#include "os.h"
#include "storage_number/storage_number.h"
#include "threads/threads.h"
diff --git a/libnetdata/worker_utilization/worker_utilization.c b/libnetdata/worker_utilization/worker_utilization.c
index 114c4ad9f8..700f88a736 100644
--- a/libnetdata/worker_utilization/worker_utilization.c
+++ b/libnetdata/worker_utilization/worker_utilization.c
@@ -24,7 +24,6 @@ struct worker {
pid_t pid;
const char *tag;
const char *workname;
- uint32_t workname_hash;
// statistics controlled variables
volatile usec_t statistics_last_checkpoint;
@@ -32,6 +31,7 @@ struct worker {
usec_t statistics_last_busy_time;
// the worker controlled variables
+ size_t worker_max_job_id;
volatile size_t job_id;
volatile size_t jobs_started;
volatile usec_t busy_time;
@@ -44,9 +44,9 @@ struct worker {
struct worker *prev;
};
-static netdata_mutex_t base_lock = NETDATA_MUTEX_INITIALIZER;
-static struct worker *base = NULL;
+static netdata_mutex_t workers_base_lock = NETDATA_MUTEX_INITIALIZER;
static __thread struct worker *worker = NULL;
+static Pvoid_t workers_per_workname_JudyHS_array = NULL;
void worker_register(const char *workname) {
if(unlikely(worker)) return;
@@ -55,16 +55,24 @@ void worker_register(const char *workname) {
worker->pid = gettid();
worker->tag = strdupz(netdata_thread_tag());
worker->workname = strdupz(workname);
- worker->workname_hash = simple_hash(worker->workname);
usec_t now = now_realtime_usec();
worker->statistics_last_checkpoint = now;
worker->last_action_timestamp = now;
worker->last_action = WORKER_IDLE;
- netdata_mutex_lock(&base_lock);
- DOUBLE_LINKED_LIST_PREPEND_UNSAFE(base, worker, prev, next);
- netdata_mutex_unlock(&base_lock);
+ size_t workname_size = strlen(workname) + 1;
+ netdata_mutex_lock(&workers_base_lock);
+
+ Pvoid_t *PValue = JudyHSGet(workers_per_workname_JudyHS_array, (void *)workname, workname_size);
+ if(!PValue)
+ PValue = JudyHSIns(&workers_per_workname_JudyHS_array, (void *)workname, workname_size, PJE0);
+
+ struct worker *base = *PValue;
+ DOUBLE_LINKED_LIST_APPEND_UNSAFE(base, worker, prev, next);
+ *PValue = base;
+
+ netdata_mutex_unlock(&workers_base_lock);
}
void worker_register_job_custom_metric(size_t job_id, const char *name, const char *units, WORKER_METRIC_TYPE type) {
@@ -74,6 +82,10 @@ void worker_register_job_custom_metric(size_t job_id, const char *name, const ch
error("WORKER_UTILIZATION: job_id %zu is too big. Max is %zu", job_id, (size_t)(WORKER_UTILIZATION_MAX_JOB_TYPES - 1));
return;
}
+
+ if(job_id > worker->worker_max_job_id)
+ worker->worker_max_job_id = job_id;
+
if(worker->per_job_type[job_id].name) {
if(strcmp(string2str(worker->per_job_type[job_id].name), name) != 0 || worker->per_job_type[job_id].type != type || strcmp(string2str(worker->per_job_type[job_id].units), units) != 0)
error("WORKER_UTILIZATION: duplicate job registration: worker '%s' job id %zu is '%s', ignoring the later '%s'", worker->workname, job_id, string2str(worker->per_job_type[job_id].name), name);
@@ -92,9 +104,18 @@ void worker_register_job_name(size_t job_id, const char *name) {
void worker_unregister(void) {
if(unlikely(!worker)) return;
- netdata_mutex_lock(&base_lock);
- DOUBLE_LINKED_LIST_REMOVE_UNSAFE(base, worker, prev, next);
- netdata_mutex_unlock(&base_lock);
+ size_t workname_size = strlen(worker->workname) + 1;
+ netdata_mutex_lock(&workers_base_lock);
+ Pvoid_t *PValue = JudyHSGet(workers_per_workname_JudyHS_array, (void *)worker->workname, workname_size);
+ if(PValue) {
+ struct worker *base = *PValue;
+ DOUBLE_LINKED_LIST_REMOVE_UNSAFE(base, worker, prev, next);
+ *PValue = base;
+
+ if(!base)
+ JudyHSDel(&workers_per_workname_JudyHS_array, (void *)worker->workname, workname_size, PJE0);
+ }
+ netdata_mutex_unlock(&workers_base_lock);
for(int i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) {
string_freez(worker->per_job_type[i].name);
@@ -170,6 +191,7 @@ void workers_foreach(const char *workname, void (*callback)(
void *data
, pid_t pid
, const char *thread_tag
+ , size_t max_job_id
, size_t utilization_usec
, size_t duration_usec
, size_t jobs_started, size_t is_running
@@ -181,15 +203,18 @@ void workers_foreach(const char *workname, void (*callback)(
, NETDATA_DOUBLE *job_custom_values
)
, void *data) {
- netdata_mutex_lock(&base_lock);
- uint32_t hash = simple_hash(workname);
+ netdata_mutex_lock(&workers_base_lock);
usec_t busy_time, delta;
size_t i, jobs_started, jobs_running;
+ size_t workname_size = strlen(workname) + 1;
+ struct worker *base = NULL;
+ Pvoid_t *PValue = JudyHSGet(workers_per_workname_JudyHS_array, (void *)workname, workname_size);
+ if(PValue)
+ base = *PValue;
+
struct worker *p;
DOUBLE_LINKED_LIST_FOREACH_FORWARD(base, p, prev, next) {
- if(hash != p->workname_hash || strcmp(workname, p->workname) != 0) continue;
-
usec_t now = now_realtime_usec();
// find per job type statistics
@@ -200,7 +225,8 @@ void workers_foreach(const char *workname, void (*callback)(
usec_t per_job_type_busy_time[WORKER_UTILIZATION_MAX_JOB_TYPES];
NETDATA_DOUBLE per_job_custom_values[WORKER_UTILIZATION_MAX_JOB_TYPES];
- for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) {
+ size_t max_job_id = p->worker_max_job_id;
+ for(i = 0; i <= max_job_id ;i++) {
per_job_type_name[i] = p->per_job_type[i].name;
per_job_type_units[i] = p->per_job_type[i].units;
per_job_metric_type[i] = p->per_job_type[i].type;
@@ -286,6 +312,7 @@ void workers_foreach(const char *workname, void (*callback)(
callback(data
, p->pid
, p->tag
+ , max_job_id
, busy_time
, delta
, jobs_started
@@ -299,5 +326,5 @@ void workers_foreach(const char *workname, void (*callback)(
);
}
- netdata_mutex_unlock(&base_lock);
+ netdata_mutex_unlock(&workers_base_lock);
}
diff --git a/libnetdata/worker_utilization/worker_utilization.h b/libnetdata/worker_utilization/worker_utilization.h
index 954fcdcba7..04d24f1f73 100644
--- a/libnetdata/worker_utilization/worker_utilization.h
+++ b/libnetdata/worker_utilization/worker_utilization.h
@@ -30,6 +30,7 @@ void workers_foreach(const char *workname, void (*callback)(
void *data
, pid_t pid
, const char *thread_tag
+ , size_t max_job_id
, size_t utilization_usec
, size_t duration_usec
, size_t jobs_started