summaryrefslogtreecommitdiffstats
path: root/database
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-01-27 01:32:20 +0200
committerGitHub <noreply@github.com>2023-01-27 01:32:20 +0200
commit57eab742c88093c89d5d46deb495558ad726e6f0 (patch)
treee8a01519a8f9df7beba4d0be7be53a9be3f1fdfd /database
parentc4f5524ea8279be492eb527a67242b408543382e (diff)
DBENGINE v2 - improvements part 10 (#14332)
* replication cancels pending queries on exit * log when waiting for inflight queries * when there are collected and not-collected metrics, use the context priority from the collected only * Write metadata with a faster pace * Remove journal file size limit and sync mode to 0 / Drop wal checkpoint for now * Wrap in a big transaction remaining metadata writes (test 1) * fix higher tiers when tiering iterations = 2 * dbengine always returns db-aligned points; query engine expands the queries by 2 points in every direction to have enough data for interpolation * Wrap in a big transaction metadata writes (test 2) * replication cancelling fix * do not first and last entry in replication when the db has no retention * fix internal check condition * Increase metadata write batch size * always apply error limit to dbengine logs * Remove code that processes the obsolete health.db files * cleanup in query.c * do not allow queries to go beyond db boundaries * prevent internal log for +1 delta in timestamp * detect gap pages in conflicts * double protection for gap injection in main cache * Add checkpoint to prevent large WAL while running Remove unused and duplicate functions * do not allocate chart cache dir if not needed * add more info to unittests * revert query expansion to satisfy unittests Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'database')
-rw-r--r--database/engine/metric.c39
-rw-r--r--database/engine/metric.h6
-rw-r--r--database/engine/pagecache.c14
-rw-r--r--database/engine/pdc.c3
-rw-r--r--database/engine/rrdengine.c10
-rwxr-xr-xdatabase/engine/rrdengineapi.c98
-rw-r--r--database/rrd.c2
-rw-r--r--database/rrd.h7
-rw-r--r--database/rrdcontext.c21
-rw-r--r--database/rrddim.c2
-rw-r--r--database/rrdhost.c33
-rw-r--r--database/rrdset.c35
-rw-r--r--database/sqlite/sqlite_functions.c110
-rw-r--r--database/sqlite/sqlite_functions.h2
-rw-r--r--database/sqlite/sqlite_health.c6
-rw-r--r--database/sqlite/sqlite_metadata.c11
16 files changed, 190 insertions, 209 deletions
diff --git a/database/engine/metric.c b/database/engine/metric.c
index af769fda95..d16bc063d9 100644
--- a/database/engine/metric.c
+++ b/database/engine/metric.c
@@ -327,33 +327,42 @@ bool mrg_metric_set_first_time_s_if_bigger(MRG *mrg __maybe_unused, METRIC *metr
return ret;
}
-bool mrg_metric_set_first_time_s_if_zero(MRG *mrg __maybe_unused, METRIC *metric, time_t first_time_s) {
- bool ret = false;
+time_t mrg_metric_get_first_time_s(MRG *mrg __maybe_unused, METRIC *metric) {
+ time_t first_time_s;
netdata_spinlock_lock(&metric->spinlock);
- if(!metric->first_time_s) {
- metric->first_time_s = first_time_s;
- ret = true;
+
+ if(unlikely(!metric->first_time_s)) {
+ if(metric->latest_time_s_clean)
+ metric->first_time_s = metric->latest_time_s_clean;
+
+ else if(metric->latest_time_s_hot)
+ metric->first_time_s = metric->latest_time_s_hot;
}
+
+ first_time_s = metric->first_time_s;
+
netdata_spinlock_unlock(&metric->spinlock);
- return ret;
+ return first_time_s;
}
-time_t mrg_metric_get_first_time_s(MRG *mrg __maybe_unused, METRIC *metric) {
- time_t first_time_s;
+void mrg_metric_get_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t *first_time_s, time_t *last_time_s, time_t *update_every_s) {
netdata_spinlock_lock(&metric->spinlock);
- first_time_s = metric->first_time_s;
- if(!first_time_s) {
+
+ if(unlikely(!metric->first_time_s)) {
if(metric->latest_time_s_clean)
- first_time_s = metric->latest_time_s_clean;
+ metric->first_time_s = metric->latest_time_s_clean;
- if(!first_time_s || metric->latest_time_s_hot < metric->latest_time_s_clean)
- first_time_s = metric->latest_time_s_hot;
+ else if(metric->latest_time_s_hot)
+ metric->first_time_s = metric->latest_time_s_hot;
}
- netdata_spinlock_unlock(&metric->spinlock);
- return first_time_s;
+ *first_time_s = metric->first_time_s;
+ *last_time_s = MAX(metric->latest_time_s_clean, metric->latest_time_s_hot);
+ *update_every_s = metric->latest_update_every_s;
+
+ netdata_spinlock_unlock(&metric->spinlock);
}
bool mrg_metric_set_clean_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t latest_time_s) {
diff --git a/database/engine/metric.h b/database/engine/metric.h
index 3eb2c97469..fe0481a1b1 100644
--- a/database/engine/metric.h
+++ b/database/engine/metric.h
@@ -46,18 +46,18 @@ Word_t mrg_metric_section(MRG *mrg, METRIC *metric);
bool mrg_metric_set_first_time_s(MRG *mrg, METRIC *metric, time_t first_time_s);
bool mrg_metric_set_first_time_s_if_bigger(MRG *mrg, METRIC *metric, time_t first_time_s);
-bool mrg_metric_set_first_time_s_if_zero(MRG *mrg, METRIC *metric, time_t first_time_s);
time_t mrg_metric_get_first_time_s(MRG *mrg, METRIC *metric);
-void mrg_metric_expand_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t first_time_s, time_t last_time_s, time_t update_every_s);
bool mrg_metric_set_clean_latest_time_s(MRG *mrg, METRIC *metric, time_t latest_time_s);
bool mrg_metric_set_hot_latest_time_s(MRG *mrg, METRIC *metric, time_t latest_time_s);
time_t mrg_metric_get_latest_time_s(MRG *mrg, METRIC *metric);
bool mrg_metric_set_update_every(MRG *mrg, METRIC *metric, time_t update_every_s);
+bool mrg_metric_set_update_every_s_if_zero(MRG *mrg, METRIC *metric, time_t update_every_s);
time_t mrg_metric_get_update_every_s(MRG *mrg, METRIC *metric);
-bool mrg_metric_set_update_every_s_if_zero(MRG *mrg, METRIC *metric, time_t update_every_s);
+void mrg_metric_expand_retention(MRG *mrg, METRIC *metric, time_t first_time_s, time_t last_time_s, time_t update_every_s);
+void mrg_metric_get_retention(MRG *mrg, METRIC *metric, time_t *first_time_s, time_t *last_time_s, time_t *update_every_s);
bool mrg_metric_writer_acquire(MRG *mrg, METRIC *metric);
bool mrg_metric_writer_release(MRG *mrg, METRIC *metric);
diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c
index 961c2b2886..11f63751b8 100644
--- a/database/engine/pagecache.c
+++ b/database/engine/pagecache.c
@@ -356,17 +356,27 @@ static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengin
}
static void pgc_inject_gap(struct rrdengine_instance *ctx, METRIC *metric, time_t start_time_s, time_t end_time_s) {
+
+ time_t db_first_time_s, db_last_time_s, db_update_every_s;
+ mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
+
+ if(is_page_in_time_range(start_time_s, end_time_s, db_first_time_s, db_last_time_s) != PAGE_IS_IN_RANGE)
+ return;
+
PGC_ENTRY page_entry = {
.hot = false,
.section = (Word_t)ctx,
.metric_id = (Word_t)metric,
- .start_time_s = start_time_s,
- .end_time_s = end_time_s,
+ .start_time_s = MAX(start_time_s, db_first_time_s),
+ .end_time_s = MIN(end_time_s, db_last_time_s),
.update_every_s = 0,
.size = 0,
.data = DBENGINE_EMPTY_PAGE,
};
+ if(page_entry.start_time_s >= page_entry.end_time_s)
+ return;
+
PGC_PAGE *page = pgc_page_add_and_acquire(main_cache, page_entry, NULL);
pgc_page_release(main_cache, page);
}
diff --git a/database/engine/pdc.c b/database/engine/pdc.c
index d0daaa5c12..0563133719 100644
--- a/database/engine/pdc.c
+++ b/database/engine/pdc.c
@@ -606,6 +606,9 @@ void pdc_acquire(PDC *pdc) {
}
bool pdc_release_and_destroy_if_unreferenced(PDC *pdc, bool worker, bool router __maybe_unused) {
+ if(unlikely(!pdc))
+ return true;
+
netdata_spinlock_lock(&pdc->refcount_spinlock);
if(pdc->refcount <= 0)
diff --git a/database/engine/rrdengine.c b/database/engine/rrdengine.c
index ed1d851b1f..dbc017aafb 100644
--- a/database/engine/rrdengine.c
+++ b/database/engine/rrdengine.c
@@ -1620,9 +1620,17 @@ static void *ctx_shutdown_tp_worker(struct rrdengine_instance *ctx __maybe_unuse
completion_wait_for(&ctx->quiesce.completion);
completion_destroy(&ctx->quiesce.completion);
+ bool logged = false;
while(__atomic_load_n(&ctx->atomic.extents_currently_being_flushed, __ATOMIC_RELAXED) ||
- __atomic_load_n(&ctx->atomic.inflight_queries, __ATOMIC_RELAXED))
+ __atomic_load_n(&ctx->atomic.inflight_queries, __ATOMIC_RELAXED)) {
+ if(!logged) {
+ logged = true;
+ info("DBENGINE: waiting for %zu inflight queries to finish to shutdown tier %d...",
+ __atomic_load_n(&ctx->atomic.inflight_queries, __ATOMIC_RELAXED),
+ (ctx->config.legacy) ? -1 : ctx->config.tier);
+ }
sleep_usec(1 * USEC_PER_MS);
+ }
completion_mark_complete(completion);
diff --git a/database/engine/rrdengineapi.c b/database/engine/rrdengineapi.c
index 22fe30e4fd..b7d2eae3b3 100755
--- a/database/engine/rrdengineapi.c
+++ b/database/engine/rrdengineapi.c
@@ -266,16 +266,19 @@ STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metri
if(!is_1st_metric_writer)
__atomic_add_fetch(&ctx->atomic.collectors_running_duplicate, 1, __ATOMIC_RELAXED);
+ mrg_metric_set_update_every(main_mrg, metric, update_every);
+
+ handle->alignment = (struct pg_alignment *)smg;
+ rrdeng_page_alignment_acquire(handle->alignment);
+
// this is important!
// if we don't set the page_end_time_ut during the first collection
// data collection may be able to go back in time and during the addition of new pages
// clean pages may be found matching ours!
- handle->page_end_time_ut = (usec_t)mrg_metric_get_latest_time_s(main_mrg, metric) * USEC_PER_SEC;
- mrg_metric_set_update_every(main_mrg, metric, update_every);
-
- handle->alignment = (struct pg_alignment *)smg;
- rrdeng_page_alignment_acquire(handle->alignment);
+ time_t db_first_time_s, db_last_time_s, db_update_every_s;
+ mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
+ handle->page_end_time_ut = (usec_t)db_last_time_s * USEC_PER_SEC;
return (STORAGE_COLLECT_HANDLE *)handle;
}
@@ -382,11 +385,12 @@ static void rrdeng_store_metric_create_new_page(struct rrdeng_collect_handle *ha
error_limit(&erl,
#endif
"DBENGINE: metric '%s' new page from %ld to %ld, update every %ld, has a conflict in main cache "
- "with existing %s page from %ld to %ld, update every %ld - "
+ "with existing %s%s page from %ld to %ld, update every %ld - "
"is it collected more than once?",
uuid,
page_entry.start_time_s, page_entry.end_time_s, (time_t)page_entry.update_every_s,
pgc_is_page_hot(page) ? "hot" : "not-hot",
+ pgc_page_data(page) == DBENGINE_EMPTY_PAGE ? " gap" : "",
pgc_page_start_time_s(page), pgc_page_end_time_s(page), pgc_page_update_every_s(page)
);
@@ -580,12 +584,8 @@ static void store_metric_next_error_log(struct rrdeng_collect_handle *handle, us
collect_page_flags_to_buffer(wb, handle->page_flags);
}
-#ifdef NETDATA_INTERNAL_CHECKS
- internal_error(true,
-#else
error_limit_static_global_var(erl, 1, 0);
error_limit(&erl,
-#endif
"DBENGINE: metric '%s' collected point at %ld, %s last collection at %ld, "
"update every %ld, %s page from %ld to %ld, position %u (of %u), flags: %s",
uuid,
@@ -699,8 +699,8 @@ int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) {
if((handle->options & RRDENG_1ST_METRIC_WRITER) && !mrg_metric_writer_release(main_mrg, handle->metric))
internal_fatal(true, "DBENGINE: metric is already released");
- time_t first_time_s = mrg_metric_get_first_time_s(main_mrg, handle->metric);
- time_t last_time_s = mrg_metric_get_latest_time_s(main_mrg, handle->metric);
+ time_t first_time_s, last_time_s, update_every_s;
+ mrg_metric_get_retention(main_mrg, handle->metric, &first_time_s, &last_time_s, &update_every_s);
mrg_metric_release(main_mrg, handle->metric);
freez(handle);
@@ -759,7 +759,11 @@ static void unregister_query_handle(struct rrdeng_query_handle *handle __maybe_u
* Gets a handle for loading metrics from the database.
* The handle must be released with rrdeng_load_metric_final().
*/
-void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *rrddim_handle, time_t start_time_s, time_t end_time_s, STORAGE_PRIORITY priority)
+void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle,
+ struct storage_engine_query_handle *rrddim_handle,
+ time_t start_time_s,
+ time_t end_time_s,
+ STORAGE_PRIORITY priority)
{
usec_t started_ut = now_monotonic_usec();
@@ -769,8 +773,6 @@ void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct sto
struct rrdengine_instance *ctx = mrg_metric_ctx(metric);
struct rrdeng_query_handle *handle;
- mrg_metric_set_update_every_s_if_zero(main_mrg, metric, default_rrd_update_every);
-
handle = rrdeng_query_handle_get();
register_query_handle(handle);
@@ -781,23 +783,48 @@ void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct sto
handle->ctx = ctx;
handle->metric = metric;
- handle->start_time_s = start_time_s;
- handle->end_time_s = end_time_s;
handle->priority = priority;
- handle->now_s = start_time_s;
- handle->dt_s = mrg_metric_get_update_every_s(main_mrg, metric);
- if(!handle->dt_s)
- handle->dt_s = default_rrd_update_every;
+ // IMPORTANT!
+ // It is crucial not to exceed the db boundaries, because dbengine
+ // now has gap caching, so when a gap is detected a negative page
+ // is inserted into the main cache, to avoid scanning the journals
+ // again for pages matching the gap.
- rrddim_handle->handle = (STORAGE_QUERY_HANDLE *)handle;
- rrddim_handle->start_time_s = start_time_s;
- rrddim_handle->end_time_s = end_time_s;
- rrddim_handle->priority = priority;
+ time_t db_first_time_s, db_last_time_s, db_update_every_s;
+ mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s);
- pg_cache_preload(handle);
+ if(is_page_in_time_range(start_time_s, end_time_s, db_first_time_s, db_last_time_s) == PAGE_IS_IN_RANGE) {
+ handle->start_time_s = MAX(start_time_s, db_first_time_s);
+ handle->end_time_s = MIN(end_time_s, db_last_time_s);
+ handle->now_s = handle->start_time_s;
- __atomic_add_fetch(&rrdeng_cache_efficiency_stats.query_time_init, now_monotonic_usec() - started_ut, __ATOMIC_RELAXED);
+ handle->dt_s = db_update_every_s;
+ if (!handle->dt_s) {
+ handle->dt_s = default_rrd_update_every;
+ mrg_metric_set_update_every_s_if_zero(main_mrg, metric, default_rrd_update_every);
+ }
+
+ rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle;
+ rrddim_handle->start_time_s = handle->start_time_s;
+ rrddim_handle->end_time_s = handle->end_time_s;
+ rrddim_handle->priority = priority;
+
+ pg_cache_preload(handle);
+
+ __atomic_add_fetch(&rrdeng_cache_efficiency_stats.query_time_init, now_monotonic_usec() - started_ut, __ATOMIC_RELAXED);
+ }
+ else {
+ handle->start_time_s = start_time_s;
+ handle->end_time_s = end_time_s;
+ handle->now_s = start_time_s;
+ handle->dt_s = db_update_every_s;
+
+ rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle;
+ rrddim_handle->start_time_s = handle->start_time_s;
+ rrddim_handle->end_time_s = 0;
+ rrddim_handle->priority = priority;
+ }
}
static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_handle, bool debug_this __maybe_unused) {
@@ -827,10 +854,19 @@ static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_han
unsigned position;
if(likely(handle->now_s >= page_start_time_s && handle->now_s <= page_end_time_s)) {
- if(unlikely(entries == 1 || page_start_time_s == page_end_time_s))
+ if(unlikely(entries == 1 || page_start_time_s == page_end_time_s || !page_update_every_s)) {
position = 0;
- else
+ handle->now_s = page_start_time_s;
+ }
+ else {
position = (handle->now_s - page_start_time_s) * (entries - 1) / (page_end_time_s - page_start_time_s);
+ time_t point_end_time_s = page_start_time_s + position * page_update_every_s;
+ if(point_end_time_s < handle->now_s && position + 1 < entries) {
+ position++;
+ point_end_time_s = page_start_time_s + position * page_update_every_s;
+ }
+ handle->now_s = point_end_time_s;
+ }
internal_fatal(position >= entries, "DBENGINE: wrong page position calculation");
}
@@ -986,8 +1022,8 @@ bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *db_instance, uuid_t *dim_
if (unlikely(!metric))
return false;
- *first_entry_s = mrg_metric_get_first_time_s(main_mrg, metric);
- *last_entry_s = mrg_metric_get_latest_time_s(main_mrg, metric);
+ time_t update_every_s;
+ mrg_metric_get_retention(main_mrg, metric, first_entry_s, last_entry_s, &update_every_s);
mrg_metric_release(main_mrg, metric);
diff --git a/database/rrd.c b/database/rrd.c
index df364419ea..d489ddb8b1 100644
--- a/database/rrd.c
+++ b/database/rrd.c
@@ -135,7 +135,7 @@ const char *rrdset_type_name(RRDSET_TYPE chart_type) {
// ----------------------------------------------------------------------------
// RRD - cache directory
-char *rrdset_cache_dir(RRDHOST *host, const char *id) {
+char *rrdhost_cache_dir_for_rrdset_alloc(RRDHOST *host, const char *id) {
char *ret = NULL;
char b[FILENAME_MAX + 1];
diff --git a/database/rrd.h b/database/rrd.h
index 1ae53b47e5..1128f7c6ab 100644
--- a/database/rrd.h
+++ b/database/rrd.h
@@ -314,7 +314,7 @@ typedef struct storage_collect_handle STORAGE_COLLECT_HANDLE;
struct rrddim_tier {
STORAGE_POINT virtual_point;
size_t tier_grouping;
- time_t next_point_time_s;
+ time_t next_point_end_time_s;
STORAGE_METRIC_HANDLE *db_metric_handle; // the metric handle inside the database
STORAGE_COLLECT_HANDLE *db_collection_handle; // the data collection handle
struct storage_engine_collect_ops *collect_ops;
@@ -905,9 +905,7 @@ typedef struct health {
time_t health_delay_up_to; // a timestamp to delay alarms processing up to
STRING *health_default_exec; // the full path of the alarms notifications program
STRING *health_default_recipient; // the default recipient for all alarms
- char *health_log_filename; // the alarms event log filename
size_t health_log_entries_written; // the number of alarm events written to the alarms event log
- FILE *health_log_fp; // the FILE pointer to the open alarms event log file
uint32_t health_default_warn_repeat_every; // the default value for the interval between repeating warning notifications
uint32_t health_default_crit_repeat_every; // the default value for the interval between repeating critical notifications
} HEALTH;
@@ -1340,7 +1338,8 @@ void rrdset_free(RRDSET *st);
#ifdef NETDATA_RRD_INTERNALS
-char *rrdset_cache_dir(RRDHOST *host, const char *id);
+char *rrdhost_cache_dir_for_rrdset_alloc(RRDHOST *host, const char *id);
+const char *rrdset_cache_dir(RRDSET *st);
void rrddim_free(RRDSET *st, RRDDIM *rd);
diff --git a/database/rrdcontext.c b/database/rrdcontext.c
index c261c832f3..8d019dafba 100644
--- a/database/rrdcontext.c
+++ b/database/rrdcontext.c
@@ -3446,6 +3446,8 @@ static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAG
if(worker_jobs)
worker_is_busy(WORKER_JOB_PP_CONTEXT);
+ size_t min_priority_collected = LONG_MAX;
+ size_t min_priority_not_collected = LONG_MAX;
size_t min_priority = LONG_MAX;
time_t min_first_time_t = LONG_MAX, max_last_time_t = 0;
size_t instances_active = 0, instances_deleted = 0;
@@ -3482,8 +3484,16 @@ static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAG
instances_active++;
- if (ri->priority >= RRDCONTEXT_MINIMUM_ALLOWED_PRIORITY && ri->priority < min_priority)
- min_priority = ri->priority;
+ if (ri->priority >= RRDCONTEXT_MINIMUM_ALLOWED_PRIORITY) {
+ if(rrd_flag_check(ri, RRD_FLAG_COLLECTED)) {
+ if(ri->priority < min_priority_collected)
+ min_priority_collected = ri->priority;
+ }
+ else {
+ if(ri->priority < min_priority_not_collected)
+ min_priority_not_collected = ri->priority;
+ }
+ }
if (ri->first_time_s && ri->first_time_s < min_first_time_t)
min_first_time_t = ri->first_time_s;
@@ -3492,6 +3502,13 @@ static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAG
max_last_time_t = ri->last_time_s;
}
dfe_done(ri);
+
+ if(min_priority_collected != LONG_MAX)
+ // use the collected priority
+ min_priority = min_priority_collected;
+ else
+ // use the non-collected priority
+ min_priority = min_priority_not_collected;
}
{
diff --git a/database/rrddim.c b/database/rrddim.c
index b520f21d3d..6846b0d42c 100644
--- a/database/rrddim.c
+++ b/database/rrddim.c
@@ -686,7 +686,7 @@ bool rrddim_memory_load_or_create_map_save(RRDSET *st, RRDDIM *rd, RRD_MEMORY_MO
char filename[FILENAME_MAX + 1];
char fullfilename[FILENAME_MAX + 1];
rrdset_strncpyz_name(filename, rrddim_id(rd), FILENAME_MAX);
- snprintfz(fullfilename, FILENAME_MAX, "%s/%s.db", st->cache_dir, filename);
+ snprintfz(fullfilename, FILENAME_MAX, "%s/%s.db", rrdset_cache_dir(st), filename);
rd_on_file = (struct rrddim_map_save_v019 *)netdata_mmap(
fullfilename, size, ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE), 1, false, NULL);
diff --git a/database/rrdhost.c b/database/rrdhost.c
index b25fc72d21..454fd6b809 100644
--- a/database/rrdhost.c
+++ b/database/rrdhost.c
@@ -499,7 +499,6 @@ int is_legacy = 1;
", health %s"
", cache_dir '%s'"
", varlib_dir '%s'"
- ", health_log '%s'"
", alarms default handler '%s'"
", alarms default recipient '%s'"
, rrdhost_hostname(host)
@@ -519,7 +518,6 @@ int is_legacy = 1;
, host->health.health_enabled?"enabled":"disabled"
, host->cache_dir
, host->varlib_dir
- , host->health.health_log_filename
, string2str(host->health.health_default_exec)
, string2str(host->health.health_default_recipient)
);
@@ -1085,7 +1083,7 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) {
if(!host) return;
if (netdata_exit || force) {
- info("Freeing all memory for host '%s'...", rrdhost_hostname(host));
+ info("RRD: 'host:%s' freeing memory...", rrdhost_hostname(host));
// ------------------------------------------------------------------------
// first remove it from the indexes, so that it will not be discoverable
@@ -1146,7 +1144,7 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) {
#endif
if (!netdata_exit && !force) {
- info("Setting archive mode for host '%s'...", rrdhost_hostname(host));
+ info("RRD: 'host:%s' is now in archive mode...", rrdhost_hostname(host));
rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED | RRDHOST_FLAG_ORPHAN);
return;
}
@@ -1187,7 +1185,6 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) {
rrdpush_destinations_free(host);
string_freez(host->health.health_default_exec);
string_freez(host->health.health_default_recipient);
- freez(host->health.health_log_filename);
string_freez(host->registry_hostname);
simple_pattern_free(host->rrdpush_send_charts_matching);
netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock);
@@ -1236,7 +1233,7 @@ void rrd_finalize_collection_for_all_hosts(void) {
void rrdhost_save_charts(RRDHOST *host) {
if(!host) return;
- info("Saving/Closing database of host '%s'...", rrdhost_hostname(host));
+ info("RRD: 'host:%s' saving / closing database...", rrdhost_hostname(host));
RRDSET *st;
@@ -1393,13 +1390,11 @@ void reload_host_labels(void) {
rrdhost_flag_set(localhost,RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE);
- health_label_log_save(localhost);
-
rrdpush_send_host_labels(localhost);
}
void rrdhost_finalize_collection(RRDHOST *host) {
- info("Stopping data collection for host '%s'...", rrdhost_hostname(host));
+ info("RRD: 'host:%s' stopping data collection...", rrdhost_hostname(host));
RRDSET *st;
rrdset_foreach_write(st, host)
@@ -1413,16 +1408,18 @@ void rrdhost_finalize_collection(RRDHOST *host) {
void rrdhost_delete_charts(RRDHOST *host) {
if(!host) return;
- info("Deleting database of host '%s'...", rrdhost_hostname(host));
+ info("RRD: 'host:%s' deleting disk files...", rrdhost_hostname(host));
RRDSET *st;
- // we get a write lock
- // to ensure only one thread is saving the database
- rrdset_foreach_write(st, host) {
- rrdset_delete_files(st);
+ if(host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || host->rrd_memory_mode == RRD_MEMORY_MODE_MAP) {
+ // we get a write lock
+ // to ensure only one thread is saving the database
+ rrdset_foreach_write(st, host){
+ rrdset_delete_files(st);
+ }
+ rrdset_foreach_done(st);
}
- rrdset_foreach_done(st);
recursively_delete_dir(host->cache_dir, "left over host");
}
@@ -1433,7 +1430,7 @@ void rrdhost_delete_charts(RRDHOST *host) {
void rrdhost_cleanup_charts(RRDHOST *host) {
if(!host) return;
- info("Cleaning up database of host '%s'...", rrdhost_hostname(host));
+ info("RRD: 'host:%s' cleaning up disk files...", rrdhost_hostname(host));
RRDSET *st;
uint32_t rrdhost_delete_obsolete_charts = rrdhost_option_check(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS);
@@ -1460,7 +1457,7 @@ void rrdhost_cleanup_charts(RRDHOST *host) {
// RRDHOST - save all hosts to disk
void rrdhost_save_all(void) {
- info("Saving database [%zu hosts(s)]...", rrdhost_hosts_available());
+ info("RRD: saving databases [%zu hosts(s)]...", rrdhost_hosts_available());
rrd_rdlock();
@@ -1475,7 +1472,7 @@ void rrdhost_save_all(void) {
// RRDHOST - save or delete all hosts from disk
void rrdhost_cleanup_all(void) {
- info("Cleaning up database [%zu hosts(s)]...", rrdhost_hosts_available());
+ info("RRD: cleaning up database [%zu hosts(s)]...", rrdhost_hosts_available());
rrd_rdlock();
diff --git a/database/rrdset.c b/database/rrdset.c
index c97ebbb145..a7378dd4d9 100644
--- a/database/rrdset.c
+++ b/database/rrdset.c
@@ -128,7 +128,6 @@ static void rrdset_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v
st->module_name = rrd_string_strdupz(ctr->module);
st->priority = ctr->priority;
- st->cache_dir = rrdset_cache_dir(host, chart_full_id);
st->entries = (ctr->memory_mode != RRD_MEMORY_MODE_DBENGINE) ? align_entries_to_pagesize(ctr->memory_mode, ctr->history_entries) : 5;
st->update_every = ctr->update_every;
st->rrd_memory_mode = ctr->memory_mode;
@@ -601,13 +600,15 @@ void rrdset_get_retention_of_tier_for_collected_chart(RRDSET *st, time_t *first_
if(unlikely(!db_last_entry_s)) {
db_last_entry_s = rrdset_last_entry_s_of_tier(st, tier);
- if (unlikely(!db_last_entry_s))
+ if (unlikely(!db_last_entry_s)) {
// we assume this is a collected RRDSET
- db_last_entry_s = now_s;
+ db_first_entry_s = 0;
+ db_last_entry_s = 0;
+ }
}
if(unlikely(db_last_entry_s > now_s)) {
- internal_error(true,
+ internal_error(db_last_entry_s > now_s + 1,
"RRDSET: 'host:%s/chart:%s' latest db time %ld is in the future, adjusting it to now %ld",
rrdhost_hostname(st->rrdhost), rrdset_id(st),
db_last_entry_s, now_s);
@@ -831,7 +832,8 @@ void rrdset_delete_files(RRDSET *st) {
}
rrddim_foreach_done(rd);
- recursively_delete_dir(st->cache_dir, "left-over chart");
+ if(st->cache_dir)
+ recursively_delete_dir(st->cache_dir, "left-over chart");
}
void rrdset_delete_obsolete_dimensions(RRDSET *st) {
@@ -1105,15 +1107,17 @@ static inline time_t tier_next_point_time_s(RRDDIM *rd, struct rrddim_tier *t, t
}
void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAGE_POINT sp, usec_t now_ut __maybe_unused) {
- if (unlikely(!t->next_point_time_s))
- t->next_point_time_s = tier_next_point_time_s(rd, t, sp.end_time_s);
+ if (unlikely(!t->next_point_end_time_s))
+ t->next_point_end_time_s = tier_next_point_time_s(rd, t, sp.end_time_s);
+
+ if(unlikely(sp.start_time_s >= t->next_point_end_time_s)) {
+ // flush the virtual point, it is done
- if(unlikely(sp.start_time_s > t->next_point_time_s)) {
if (likely(!storage_point_is_unset(t->virtual_point))) {
t->collect_ops->store_metric(
t->db_collection_handle,
- t->next_point_time_s * USEC_PER_SEC,
+ t->next_point_end_time_s * USEC_PER_SEC,
t->virtual_point.sum,
t->virtual_point.min,
t->virtual_point.max,
@@ -1124,7 +1128,7 @@ void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAG
else {
t->collect_ops->store_metric(
t->db_collection_handle,
- t->next_point_time_s * USEC_PER_SEC,
+ t->next_point_end_time_s * USEC_PER_SEC,
NAN,
NAN,
NAN,
@@ -1134,7 +1138,7 @@ void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAG
rrdset_done_statistics_points_stored_per_tier[tier]++;
t->virtual_point.count = 0; // make the point unset
- t->next_point_time_s = tier_next_point_time_s(rd, t, sp.end_time_s);
+ t->next_point_end_time_s = tier_next_point_time_s(rd, t, sp.end_time_s);
}
// merge the dates into our virtual point
@@ -2073,6 +2077,13 @@ const char *rrdset_cache_filename(RRDSET *st) {
return st_on_file->cache_filename;
}
+const char *rrdset_cache_dir(RRDSET *st) {
+ if(!st->cache_dir)
+ st->cache_dir = rrdhost_cache_dir_for_rrdset_alloc(st->rrdhost, rrdset_id(st));
+
+ return st->cache_dir;
+}
+
void rrdset_memory_file_free(RRDSET *st) {
if(!st->st_on_file) return;
@@ -2103,7 +2114,7 @@ bool rrdset_memory_load_or_create_map_save(RRDSET *st, RRD_MEMORY_MODE memory_mo
return false;
char fullfilename[FILENAME_MAX + 1];
- snprintfz(fullfilename, FILENAME_MAX, "%s/main.db", st->cache_dir);
+ snprintfz(fullfilename, FILENAME_MAX, "%s/main.db", rrdset_cache_dir(st));
unsigned long size = sizeof(struct rrdset_map_save_v019);
struct rrdset_map_save_v019 *st_on_file = (struct rrdset_map_save_v019 *)netdata_mmap(
diff --git a/database/sqlite/sqlite_functions.c b/database/sqlite/sqlite_functions.c
index fc32853182..1d03cfc2a5 100644
--- a/database/sqlite/sqlite_functions.c
+++ b/database/sqlite/sqlite_functions.c
@@ -529,116 +529,6 @@ void db_execute(const char *cmd)</