diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2023-01-27 01:32:20 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-27 01:32:20 +0200 |
commit | 57eab742c88093c89d5d46deb495558ad726e6f0 (patch) | |
tree | e8a01519a8f9df7beba4d0be7be53a9be3f1fdfd /web | |
parent | c4f5524ea8279be492eb527a67242b408543382e (diff) |
DBENGINE v2 - improvements part 10 (#14332)
* replication cancels pending queries on exit
* log when waiting for inflight queries
* when there are collected and not-collected metrics, use the context priority from the collected only
* Write metadata with a faster pace
* Remove journal file size limit and sync mode to 0 / Drop wal checkpoint for now
* Wrap in a big transaction remaining metadata writes (test 1)
* fix higher tiers when tiering iterations = 2
* dbengine always returns db-aligned points; query engine expands the queries by 2 points in every direction to have enough data for interpolation
* Wrap in a big transaction metadata writes (test 2)
* replication cancelling fix
* do not first and last entry in replication when the db has no retention
* fix internal check condition
* Increase metadata write batch size
* always apply error limit to dbengine logs
* Remove code that processes the obsolete health.db files
* cleanup in query.c
* do not allow queries to go beyond db boundaries
* prevent internal log for +1 delta in timestamp
* detect gap pages in conflicts
* double protection for gap injection in main cache
* Add checkpoint to prevent large WAL while running
Remove unused and duplicate functions
* do not allocate chart cache dir if not needed
* add more info to unittests
* revert query expansion to satisfy unittests
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'web')
-rw-r--r-- | web/api/queries/query.c | 48 |
1 files changed, 31 insertions, 17 deletions
diff --git a/web/api/queries/query.c b/web/api/queries/query.c index f2c4852677..271e8c0a5b 100644 --- a/web/api/queries/query.c +++ b/web/api/queries/query.c @@ -17,6 +17,8 @@ #include "percentile/percentile.h" #include "trimmed_mean/trimmed_mean.h" +#define POINTS_TO_EXPAND_QUERY 0 + // ---------------------------------------------------------------------------- static struct { @@ -957,10 +959,12 @@ static void query_planer_initialize_plans(QUERY_ENGINE_OPS *ops) { QUERY_METRIC *qm = ops->qm; for(size_t p = 0; p < qm->plan.used ; p++) { - time_t after = qm->plan.array[p].after; - time_t before = qm->plan.array[p].before; - size_t tier = qm->plan.array[p].tier; + time_t update_every = qm->tiers[tier].db_update_every_s; + + time_t after = qm->plan.array[p].after - (update_every * POINTS_TO_EXPAND_QUERY); + time_t before = qm->plan.array[p].before + (update_every * POINTS_TO_EXPAND_QUERY); + struct query_metric_tier *tier_ptr = &qm->tiers[tier]; tier_ptr->eng->api.query_ops.init( tier_ptr->db_metric_handle, @@ -1180,11 +1184,6 @@ static bool query_plan(QUERY_ENGINE_OPS *ops, time_t after_wanted, time_t before } #endif - for(size_t p = 0; p < qm->plan.used ;p++) { - size_t tier = qm->plan.array[p].tier; - qm->plan.array[p].before += qm->tiers[tier].db_update_every_s - 1; - } - query_planer_initialize_plans(ops); query_planer_activate_plan(ops, 0, 0); @@ -1361,19 +1360,30 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_ } // check if the db is giving us zero duration points - if(unlikely(db_points_read_since_plan_switch > 1 && new_point.start_time == new_point.end_time)) { - internal_error(true, "QUERY: '%s', dimension '%s' next_metric() returned point %zu start time %ld, end time %ld, that are both equal", - qt->id, string2str(qm->dimension.id), new_point.id, new_point.start_time, new_point.end_time); + if(unlikely(db_points_read_since_plan_switch > 1 && + new_point.start_time == new_point.end_time)) { + + internal_error(true, "QUERY: '%s', dimension '%s' next_metric() returned " + "point %zu from %ld to %ld, that are both equal", + qt->id, string2str(qm->dimension.id), + new_point.id, new_point.start_time, new_point.end_time); new_point.start_time = new_point.end_time - ops->tier_ptr->db_update_every_s; } // check if the db is advancing the query - if(unlikely(db_points_read_since_plan_switch > 1 && new_point.end_time <= last1_point.end_time)) { + if(unlikely(db_points_read_since_plan_switch > 1 && + new_point.end_time <= last1_point.end_time)) { + internal_error(true, - "QUERY: '%s', dimension '%s' next_metric() returned point %zu from %ld to %ld, before the last point %zu from %ld to %ld, now is %ld to %ld", - qt->id, string2str(qm->dimension.id), new_point.id, new_point.start_time, new_point.end_time, - last1_point.id, last1_point.start_time, last1_point.end_time, now_start_time, now_end_time); + "QUERY: '%s', dimension '%s' next_metric() returned " + "point %zu from %ld to %ld, before the " + "last point %zu from %ld to %ld, " + "now is %ld to %ld", + qt->id, string2str(qm->dimension.id), + new_point.id, new_point.start_time, new_point.end_time, + last1_point.id, last1_point.start_time, last1_point.end_time, + now_start_time, now_end_time); count_same_end_time++; continue; @@ -1398,8 +1408,12 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_ // at exactly the time we will want // we only log if this is not point 1 - internal_error(new_point.end_time < after_wanted && new_point.id > 1, - "QUERY: '%s', dimension '%s' next_metric() returned point %zu from %ld time %ld, which is entirely before our current timeframe %ld to %ld (and before the entire query, after %ld, before %ld)", + internal_error(new_point.end_time < after_wanted && + new_point.id > POINTS_TO_EXPAND_QUERY + 1, + "QUERY: '%s', dimension '%s' next_metric() " + "returned point %zu from %ld time %ld, " + "which is entirely before our current timeframe %ld to %ld " + "(and before the entire query, after %ld, before %ld)", qt->id, string2str(qm->dimension.id), new_point.id, new_point.start_time, new_point.end_time, now_start_time, now_end_time, |