summaryrefslogtreecommitdiffstats
path: root/database/contexts/query_target.c
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-04-07 21:25:01 +0300
committerGitHub <noreply@github.com>2023-04-07 21:25:01 +0300
commit204dd9ae272445d13f308badb07e99675fa34892 (patch)
treef42e873c60219b5031dcfc3e076adb2398cdb3fe /database/contexts/query_target.c
parent61925baaf6e2448c641e8e71248a47f7a01c4efd (diff)
Boost dbengine (#14832)
* configure extent cache size * workers can now execute up to 10 jobs in a run, boosting query prep and extent reads * fix dispatched and executing counters * boost to the max * increase libuv worker threads * query prep always get more prio than extent reads; stop processing in batch when dbengine is queue is critical * fix accounting of query prep * inlining of time-grouping functions, to speed up queries with billions of points * make switching based on a local const variable * print one pending contexts loading message per iteration * inlined store engine query API * inlined storage engine data collection api * inlined all storage engine query ops * eliminate and inline data collection ops * simplified query group-by * more error handling * optimized partial trimming of group-by queries * preparative work to support multiple passes of group-by * more preparative work to support multiple passes of group-by (accepts multiple group-by params) * unified query timings * unified query timings - weights endpoint * query target is no longer a static thread variable - there is a list of cached query targets, each of which of freed every 1000 queries * fix query memory accounting * added summary.dimension[].pri and sorted summary.dimensions based on priority and then name * limit max ACLK WEB response size to 30MB * the response type should be text/plain * more preparative work for multiple group-by passes * create functions for generating group by keys, ids and names * multiple group-by passes are now supported * parse group-by options array also with an index * implemented percentage-of-instance group by function * family is now merged in multi-node contexts * prevent uninitialized use
Diffstat (limited to 'database/contexts/query_target.c')
-rw-r--r--database/contexts/query_target.c201
1 files changed, 130 insertions, 71 deletions
diff --git a/database/contexts/query_target.c b/database/contexts/query_target.c
index 0378f52017..19ab8caadd 100644
--- a/database/contexts/query_target.c
+++ b/database/contexts/query_target.c
@@ -12,42 +12,55 @@ static void query_instance_release(QUERY_INSTANCE *qi);
static void query_context_release(QUERY_CONTEXT *qc);
static void query_node_release(QUERY_NODE *qn);
-static __thread QUERY_TARGET thread_query_target = {};
-
-// ----------------------------------------------------------------------------
-// query API
-
-typedef struct query_target_locals {
- time_t start_s;
-
- QUERY_TARGET *qt;
+static __thread QUERY_TARGET *thread_qt = NULL;
+static struct {
+ struct {
+ SPINLOCK spinlock;
+ size_t count;
+ QUERY_TARGET *base;
+ } available;
- RRDSET *st;
+ struct {
+ SPINLOCK spinlock;
+ size_t count;
+ QUERY_TARGET *base;
+ } used;
+} query_target_base = {
+ .available = {
+ .spinlock = NETDATA_SPINLOCK_INITIALIZER,
+ .base = NULL,
+ .count = 0,
+ },
+ .used = {
+ .spinlock = NETDATA_SPINLOCK_INITIALIZER,
+ .base = NULL,
+ .count = 0,
+ },
+};
+
+static void query_target_destroy(QUERY_TARGET *qt) {
+ __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->query.size * sizeof(*qt->query.array), __ATOMIC_RELAXED);
+ freez(qt->query.array);
- const char *scope_nodes;
- const char *scope_contexts;
+ __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->dimensions.size * sizeof(*qt->dimensions.array), __ATOMIC_RELAXED);
+ freez(qt->dimensions.array);
- const char *nodes;
- const char *contexts;
- const char *instances;
- const char *dimensions;
- const char *chart_label_key;
- const char *labels;
- const char *alerts;
+ __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->instances.size * sizeof(*qt->instances.array), __ATOMIC_RELAXED);
+ freez(qt->instances.array);
- long long after;
- long long before;
- bool match_ids;
- bool match_names;
+ __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->contexts.size * sizeof(*qt->contexts.array), __ATOMIC_RELAXED);
+ freez(qt->contexts.array);
- size_t metrics_skipped_due_to_not_matching_timeframe;
+ __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->nodes.size * sizeof(*qt->nodes.array), __ATOMIC_RELAXED);
+ freez(qt->nodes.array);
- char host_uuid_buffer[UUID_STR_LEN];
- QUERY_NODE *qn; // temp to pass on callbacks, ignore otherwise - no need to free
-} QUERY_TARGET_LOCALS;
+ freez(qt);
+}
void query_target_release(QUERY_TARGET *qt) {
- if(unlikely(!qt || !qt->used)) return;
+ if(unlikely(!qt)) return;
+
+ internal_fatal(!qt->internal.used, "QUERY TARGET: qt to be released is not used");
simple_pattern_free(qt->nodes.scope_pattern);
qt->nodes.scope_pattern = NULL;
@@ -113,44 +126,91 @@ void query_target_release(QUERY_TARGET *qt) {
qt->db.first_time_s = 0;
qt->db.last_time_s = 0;
- qt->group_by.used = 0;
+ for(size_t g = 0; g < MAX_QUERY_GROUP_BY_PASSES ;g++)
+ qt->group_by[g].used = 0;
qt->id[0] = '\0';
- qt->used = false;
+ netdata_spinlock_lock(&query_target_base.used.spinlock);
+ DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(query_target_base.used.base, qt, internal.prev, internal.next);
+ query_target_base.used.count--;
+ netdata_spinlock_unlock(&query_target_base.used.spinlock);
+
+ qt->internal.used = false;
+ thread_qt = NULL;
+
+ if (qt->internal.queries > 1000) {
+ query_target_destroy(qt);
+ }
+ else {
+ netdata_spinlock_lock(&query_target_base.available.spinlock);
+ DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(query_target_base.available.base, qt, internal.prev, internal.next);
+ query_target_base.available.count++;
+ netdata_spinlock_unlock(&query_target_base.available.spinlock);
+ }
}
-void query_target_free(void) {
- QUERY_TARGET *qt = &thread_query_target;
- if(qt->used)
- query_target_release(qt);
+static QUERY_TARGET *query_target_get(void) {
+ netdata_spinlock_lock(&query_target_base.available.spinlock);
+ QUERY_TARGET *qt = query_target_base.available.base;
+ if (qt) {
+ DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(query_target_base.available.base, qt, internal.prev, internal.next);
+ query_target_base.available.count--;
+ }
+ netdata_spinlock_unlock(&query_target_base.available.spinlock);
- __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->query.size * sizeof(QUERY_METRIC), __ATOMIC_RELAXED);
- freez(qt->query.array);
- qt->query.array = NULL;
- qt->query.size = 0;
+ if(unlikely(!qt))
+ qt = callocz(1, sizeof(*qt));
- __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->dimensions.size * sizeof(RRDMETRIC_ACQUIRED *), __ATOMIC_RELAXED);
- freez(qt->dimensions.array);
- qt->dimensions.array = NULL;
- qt->dimensions.size = 0;
+ netdata_spinlock_lock(&query_target_base.used.spinlock);
+ DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(query_target_base.used.base, qt, internal.prev, internal.next);
+ query_target_base.used.count++;
+ netdata_spinlock_unlock(&query_target_base.used.spinlock);
- __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->instances.size * sizeof(RRDINSTANCE_ACQUIRED *), __ATOMIC_RELAXED);
- freez(qt->instances.array);
- qt->instances.array = NULL;
- qt->instances.size = 0;
+ qt->internal.used = true;
+ qt->internal.queries++;
+ thread_qt = qt;
- __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->contexts.size * sizeof(RRDCONTEXT_ACQUIRED *), __ATOMIC_RELAXED);
- freez(qt->contexts.array);
- qt->contexts.array = NULL;
- qt->contexts.size = 0;
+ return qt;
+}
- __atomic_sub_fetch(&netdata_buffers_statistics.query_targets_size, qt->nodes.size * sizeof(RRDHOST *), __ATOMIC_RELAXED);
- freez(qt->nodes.array);
- qt->nodes.array = NULL;
- qt->nodes.size = 0;
+// this is used to release a query target from a cancelled thread
+void query_target_free(void) {
+ query_target_release(thread_qt);
}
+// ----------------------------------------------------------------------------
+// query API
+
+typedef struct query_target_locals {
+ time_t start_s;
+
+ QUERY_TARGET *qt;
+
+ RRDSET *st;
+
+ const char *scope_nodes;
+ const char *scope_contexts;
+
+ const char *nodes;
+ const char *contexts;
+ const char *instances;
+ const char *dimensions;
+ const char *chart_label_key;
+ const char *labels;
+ const char *alerts;
+
+ long long after;
+ long long before;
+ bool match_ids;
+ bool match_names;
+
+ size_t metrics_skipped_due_to_not_matching_timeframe;
+
+ char host_uuid_buffer[UUID_STR_LEN];
+ QUERY_NODE *qn; // temp to pass on callbacks, ignore otherwise - no need to free
+} QUERY_TARGET_LOCALS;
+
struct storage_engine *query_metric_storage_engine(QUERY_TARGET *qt, QUERY_METRIC *qm, size_t tier) {
QUERY_NODE *qn = query_node(qt, qm->link.query_node_id);
return qn->rrdhost->db[tier].eng;
@@ -198,8 +258,8 @@ static bool query_metric_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_CON
tier_retention[tier].db_metric_handle = eng->api.metric_get(qn->rrdhost->db[tier].instance, &rm->uuid);
if(tier_retention[tier].db_metric_handle) {
- tier_retention[tier].db_first_time_s = tier_retention[tier].eng->api.query_ops.oldest_time_s(tier_retention[tier].db_metric_handle);
- tier_retention[tier].db_last_time_s = tier_retention[tier].eng->api.query_ops.latest_time_s(tier_retention[tier].db_metric_handle);
+ tier_retention[tier].db_first_time_s = storage_engine_oldest_time_s(tier_retention[tier].eng->backend, tier_retention[tier].db_metric_handle);
+ tier_retention[tier].db_last_time_s = storage_engine_latest_time_s(tier_retention[tier].eng->backend, tier_retention[tier].db_metric_handle);
if(!common_first_time_s)
common_first_time_s = tier_retention[tier].db_first_time_s;
@@ -301,7 +361,7 @@ static inline void query_dimension_release(QUERY_DIMENSION *qd) {
qd->rma = NULL;
}
-static QUERY_DIMENSION *query_dimension_allocate(QUERY_TARGET *qt, RRDMETRIC_ACQUIRED *rma, QUERY_STATUS status) {
+static QUERY_DIMENSION *query_dimension_allocate(QUERY_TARGET *qt, RRDMETRIC_ACQUIRED *rma, QUERY_STATUS status, size_t priority) {
if(qt->dimensions.used == qt->dimensions.size) {
size_t old_mem = qt->dimensions.size * sizeof(*qt->dimensions.array);
qt->dimensions.size = query_target_realloc_size(qt->dimensions.size, 4);
@@ -316,12 +376,13 @@ static QUERY_DIMENSION *query_dimension_allocate(QUERY_TARGET *qt, RRDMETRIC_ACQ
qd->slot = qt->dimensions.used++;
qd->rma = rrdmetric_acquired_dup(rma);
qd->status = status;
+ qd->priority = priority;
return qd;
}
static bool query_dimension_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_CONTEXT *qc, QUERY_INSTANCE *qi,
- RRDMETRIC_ACQUIRED *rma, bool queryable_instance, size_t *metrics_added) {
+ RRDMETRIC_ACQUIRED *rma, bool queryable_instance, size_t *metrics_added, size_t priority) {
QUERY_TARGET *qt = qtl->qt;
RRDMETRIC *rm = rrdmetric_acquired_value(rma);
@@ -364,7 +425,7 @@ static bool query_dimension_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_
// the user selection does not match this dimension
// but, we may still need to query it
- if (qt->request.options & RRDR_OPTION_PERCENTAGE) {
+ if (query_target_needs_all_dimensions(qt)) {
// this is percentage calculation
// so, we need this dimension to calculate the percentage
needed = true;
@@ -389,7 +450,7 @@ static bool query_dimension_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_
status |= QUERY_STATUS_DIMENSION_HIDDEN;
options |= RRDR_DIMENSION_HIDDEN;
- if (qt->request.options & RRDR_OPTION_PERCENTAGE) {
+ if (query_target_needs_all_dimensions(qt)) {
// this is percentage calculation
// so, we need this dimension to calculate the percentage
needed = true;
@@ -430,7 +491,7 @@ static bool query_dimension_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_
if(undo)
return false;
- query_dimension_allocate(qt, rma, status);
+ query_dimension_allocate(qt, rma, status, priority);
return true;
}
@@ -694,17 +755,17 @@ static bool query_instance_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_C
if(queryable_instance && qt->request.version >= 2)
query_target_eval_instance_rrdcalc(qtl, qn, qc, qi);
- size_t dimensions_added = 0, metrics_added = 0;
+ size_t dimensions_added = 0, metrics_added = 0, priority = 0;
if(unlikely(qt->request.rma)) {
- if(query_dimension_add(qtl, qn, qc, qi, qt->request.rma, queryable_instance, &metrics_added))
+ if(query_dimension_add(qtl, qn, qc, qi, qt->request.rma, queryable_instance, &metrics_added, priority++))
dimensions_added++;
}
else {
RRDMETRIC *rm;
dfe_start_read(ri->rrdmetrics, rm) {
if(query_dimension_add(qtl, qn, qc, qi, (RRDMETRIC_ACQUIRED *) rm_dfe.item,
- queryable_instance, &metrics_added))
+ queryable_instance, &metrics_added, priority++))
dimensions_added++;
}
dfe_done(rm);
@@ -957,13 +1018,7 @@ QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr) {
if(!service_running(ABILITY_DATA_QUERIES))
return NULL;
- QUERY_TARGET *qt = &thread_query_target;
-
- if(qt->used)
- fatal("QUERY TARGET: this query target is already used (%zu queries made with this QUERY_TARGET so far).", qt->queries);
-
- qt->used = true;
- qt->queries++;
+ QUERY_TARGET *qt = query_target_get();
if(!qtr->received_ut)
qtr->received_ut = now_monotonic_usec();
@@ -985,7 +1040,11 @@ QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr) {
query_target_generate_name(qt);
qt->window.after = qt->request.after;
qt->window.before = qt->request.before;
+
qt->window.options = qt->request.options;
+ if(query_target_has_percentage_of_instance(qt))
+ qt->window.options &= ~RRDR_OPTION_PERCENTAGE;
+
rrdr_relative_window_to_absolute(&qt->window.after, &qt->window.before, &qt->window.now);
// prepare our local variables - we need these across all these functions