summaryrefslogtreecommitdiffstats
path: root/database/rrd.h
diff options
context:
space:
mode:
authorStelios Fragkakis <52996999+stelfrag@users.noreply.github.com>2022-07-06 14:01:53 +0300
committerGitHub <noreply@github.com>2022-07-06 14:01:53 +0300
commit49234f23de3a32682daff07ca229b6b62f24c090 (patch)
treea81ed628abcf4457737bcc3597b097e8e430497a /database/rrd.h
parent8d5850fd49bf6308cd6cab690cdbba4a35505b39 (diff)
Multi-Tier database backend for long term metrics storage (#13263)
* Tier part 1 * Tier part 2 * Tier part 3 * Tier part 4 * Tier part 5 * Fix some ML compilation errors * fix more conflicts * pass proper tier * move metric_uuid from state to RRDDIM * move aclk_live_status from state to RRDDIM * move ml_dimension from state to RRDDIM * abstracted the data collection interface * support flushing for mem db too * abstracted the query api * abstracted latest/oldest time per metric * cleanup * store_metric for tier1 * fix for store_metric * allow multiple tiers, more than 2 * state to tier * Change storage type in db. Query param to request min, max, sum or average * Store tier data correctly * Fix skipping tier page type * Add tier grouping in the tier * Fix to handle archived charts (part 1) * Temp fix for query granularity when requesting tier1 data * Fix parameters in the correct order and calculate the anomaly based on the anomaly count * Proper tiering grouping * Anomaly calculation based on anomaly count * force type checking on storage handles * update cmocka tests * fully dynamic number of storage tiers * fix static allocation * configure grouping for all tiers; disable tiers for unittest; disable statsd configuration for private charts mode * use default page dt using the tiering info * automatic selection of tier * fix for automatic selection of tier * working prototype of dynamic tier selection * automatic selection of tier done right (I hope) * ask for the proper tier value, based on the grouping function * fixes for unittests and load_metric_next() * fixes for lgtm findings * minor renames * add dbengine to page cache size setting * add dbengine to page cache with malloc * query engine optimized to loop as little are required based on the view_update_every * query engine grouping methods now do not assume a constant number of points per group and they allocate memory with OWA * report db points per tier in jsonwrap * query planer that switches database tiers on the fly to satisfy the query for the entire timeframe * dbegnine statistics and documentation (in progress) * calculate average point duration in db * handle single point pages the best we can * handle single point pages even better * Keep page type in the rrdeng_page_descr * updated doc * handle future backwards compatibility - improved statistics * support &tier=X in queries * enfore increasing iterations on tiers * tier 1 is always 1 iteration * backfilling higher tiers on first data collection * reversed anomaly bit * set up to 5 tiers * natural points should only be offered on tier 0, except a specific tier is selected * do not allow more than 65535 points of tier0 to be aggregated on any tier * Work only on actually activated tiers * fix query interpolation * fix query interpolation again * fix lgtm finding * Activate one tier for now * backfilling of higher tiers using raw metrics from lower tiers * fix for crash on start when storage tiers is increased from the default * more statistics on exit * fix bug that prevented higher tiers to get any values; added backfilling options * fixed the statistics log line * removed limit of 255 iterations per tier; moved the code of freezing rd->tiers[x]->db_metric_handle * fixed division by zero on zero points_wanted * removed dead code * Decide on the descr->type for the type of metric * dont store metrics on unknown page types * free db_metric_handle on sql based context queries * Disable STORAGE_POINT value check in the exporting engine unit tests * fix for db modes other than dbengine * fix for aclk archived chart queries destroying db_metric_handles of valid rrddims * fix left-over freez() instead of OWA freez on median queries Co-authored-by: Costa Tsaousis <costa@netdata.cloud> Co-authored-by: Vladimir Kobal <vlad@prokk.net>
Diffstat (limited to 'database/rrd.h')
-rw-r--r--database/rrd.h300
1 files changed, 146 insertions, 154 deletions
diff --git a/database/rrd.h b/database/rrd.h
index 0476b50206..d5acbc4895 100644
--- a/database/rrd.h
+++ b/database/rrd.h
@@ -7,6 +7,11 @@
extern "C" {
#endif
+// non-existing structs instead of voids
+// to enable type checking at compile time
+typedef struct storage_instance STORAGE_INSTANCE;
+typedef struct storage_metric_handle STORAGE_METRIC_HANDLE;
+
// forward typedefs
typedef struct rrdhost RRDHOST;
typedef struct rrddim RRDDIM;
@@ -23,9 +28,10 @@ typedef void *ml_host_t;
typedef void *ml_dimension_t;
// forward declarations
-struct rrddim_volatile;
+struct rrddim_tier;
struct rrdset_volatile;
struct context_param;
+
#ifdef ENABLE_DBENGINE
struct rrdeng_page_descr;
struct rrdengine_instance;
@@ -34,6 +40,7 @@ struct pg_cache_page_index;
#include "daemon/common.h"
#include "web/api/queries/query.h"
+#include "web/api/queries/rrdr.h"
#include "rrdvar.h"
#include "rrdsetvar.h"
#include "rrddimvar.h"
@@ -43,6 +50,17 @@ struct pg_cache_page_index;
#include "aclk/aclk_rrdhost_state.h"
#include "sqlite/sqlite_health.h"
+extern int storage_tiers;
+extern int storage_tiers_grouping_iterations[RRD_STORAGE_TIERS];
+
+typedef enum {
+ RRD_BACKFILL_NONE,
+ RRD_BACKFILL_FULL,
+ RRD_BACKFILL_NEW
+} RRD_BACKFILL;
+
+extern RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS];
+
enum {
CONTEXT_FLAGS_ARCHIVE = 0x01,
CONTEXT_FLAGS_CHART = 0x02,
@@ -226,6 +244,8 @@ struct rrddim {
avl_t avl; // the binary index - this has to be first member!
+ uuid_t metric_uuid; // global UUID for this metric (unique_across hosts)
+
// ------------------------------------------------------------------------
// the dimension definition
@@ -257,7 +277,13 @@ struct rrddim {
// this is actual date time we updated the last_collected_value
// THIS IS DIFFERENT FROM THE SAME MEMBER OF RRDSET
- struct rrddim_volatile *state; // volatile state that is not persistently stored
+#ifdef ENABLE_ACLK
+ int aclk_live_status;
+#endif
+ ml_dimension_t ml_dimension;
+
+ struct rrddim_tier *tiers[RRD_STORAGE_TIERS]; // our tiers of databases
+
size_t collections_counter; // the number of times we added values to this rrdim
collected_number collected_value_max; // the absolute maximum of the collected value
@@ -326,30 +352,71 @@ struct rrddim_query_handle {
RRDDIM *rd;
time_t start_time;
time_t end_time;
+ TIER_QUERY_FETCH tier_query_fetch_type;
STORAGE_QUERY_HANDLE* handle;
};
+typedef struct storage_point {
+ NETDATA_DOUBLE min; // when count > 1, this is the minimum among them
+ NETDATA_DOUBLE max; // when count > 1, this is the maximum among them
+ NETDATA_DOUBLE sum; // the point sum - divided by count gives the average
+
+ // end_time - start_time = point duration
+ time_t start_time; // the time the point starts
+ time_t end_time; // the time the point ends
+
+ unsigned count; // the number of original points aggregated
+ unsigned anomaly_count; // the number of original points found anomalous
+
+ SN_FLAGS flags; // flags stored with the point
+} STORAGE_POINT;
+
+#define storage_point_unset(x) do { \
+ (x).min = (x).max = (x).sum = NAN; \
+ (x).count = 0; \
+ (x).anomaly_count = 0; \
+ (x).flags = SN_EMPTY_SLOT; \
+ (x).start_time = 0; \
+ (x).end_time = 0; \
+ } while(0)
+
+#define storage_point_empty(x, start_t, end_t) do { \
+ (x).min = (x).max = (x).sum = NAN; \
+ (x).count = 1; \
+ (x).anomaly_count = 0; \
+ (x).flags = SN_EMPTY_SLOT; \
+ (x).start_time = start_t; \
+ (x).end_time = end_t; \
+ } while(0)
+
+#define storage_point_is_unset(x) (!(x).count)
+#define storage_point_is_empty(x) (!netdata_double_isnumber((x).sum))
+
// ------------------------------------------------------------------------
// function pointers that handle data collection
struct rrddim_collect_ops {
// an initialization function to run before starting collection
- void (*init)(RRDDIM *rd);
+ STORAGE_COLLECT_HANDLE *(*init)(STORAGE_METRIC_HANDLE *db_metric_handle);
// run this to store each metric into the database
- void (*store_metric)(RRDDIM *rd, usec_t point_in_time, NETDATA_DOUBLE number, SN_FLAGS flags);
+ void (*store_metric)(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time, NETDATA_DOUBLE number, NETDATA_DOUBLE min_value,
+ NETDATA_DOUBLE max_value, uint16_t count, uint16_t anomaly_count, SN_FLAGS flags);
+
+ // run this to flush / reset the current data collection sequence
+ void (*flush)(STORAGE_COLLECT_HANDLE *collection_handle);
// an finalization function to run after collection is over
// returns 1 if it's safe to delete the dimension
- int (*finalize)(RRDDIM *rd);
+ int (*finalize)(STORAGE_COLLECT_HANDLE *collection_handle);
};
// function pointers that handle database queries
struct rrddim_query_ops {
// run this before starting a series of next_metric() database queries
- void (*init)(RRDDIM *rd, struct rrddim_query_handle *handle, time_t start_time, time_t end_time);
+ void (*init)(STORAGE_METRIC_HANDLE *db_metric_handle, struct rrddim_query_handle *handle, time_t start_time, time_t end_time, TIER_QUERY_FETCH tier_query_fetch_type);
// run this to load each metric number from the database
- NETDATA_DOUBLE (*next_metric)(struct rrddim_query_handle *handle, time_t *current_time, time_t *end_time, SN_FLAGS *flags);
+ STORAGE_POINT (*next_metric)(struct rrddim_query_handle *handle);
// run this to test if the series of next_metric() database queries is finished
int (*is_finished)(struct rrddim_query_handle *handle);
@@ -358,29 +425,31 @@ struct rrddim_query_ops {
void (*finalize)(struct rrddim_query_handle *handle);
// get the timestamp of the last entry of this metric
- time_t (*latest_time)(RRDDIM *rd);
+ time_t (*latest_time)(STORAGE_METRIC_HANDLE *db_metric_handle);
// get the timestamp of the first entry of this metric
- time_t (*oldest_time)(RRDDIM *rd);
+ time_t (*oldest_time)(STORAGE_METRIC_HANDLE *db_metric_handle);
};
+
// ----------------------------------------------------------------------------
-// volatile state per RRD dimension
-struct rrddim_volatile {
-#ifdef ENABLE_DBENGINE
- uuid_t *rrdeng_uuid; // database engine metric UUID
- struct pg_cache_page_index *page_index;
-#endif
-#ifdef ENABLE_ACLK
- int aclk_live_status;
-#endif
- uuid_t metric_uuid; // global UUID for this metric (unique_across hosts)
- STORAGE_COLLECT_HANDLE* handle;
+// Storage tier data for every dimension
+
+struct rrddim_tier {
+ int tier_grouping;
+ RRD_MEMORY_MODE mode; // the memory mode of this tier
+ RRD_BACKFILL backfill; // backfilling configuration
+ STORAGE_METRIC_HANDLE *db_metric_handle; // the metric handle inside the database
+ STORAGE_COLLECT_HANDLE *db_collection_handle; // the data collection handle
+ STORAGE_POINT virtual_point;
+ time_t next_point_time;
+ usec_t last_collected_ut;
struct rrddim_collect_ops collect_ops;
struct rrddim_query_ops query_ops;
- ml_dimension_t ml_dimension;
};
+extern void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, int tier, time_t now);
+
// ----------------------------------------------------------------------------
// volatile state per chart
struct rrdset_volatile {
@@ -854,9 +923,8 @@ struct rrdhost {
avl_tree_lock rrdfamily_root_index; // the host's chart families index
avl_tree_lock rrdvar_root_index; // the host's chart variables index
-#ifdef ENABLE_DBENGINE
- struct rrdengine_instance *rrdeng_ctx; // DB engine instance for this host
-#endif
+ STORAGE_INSTANCE *storage_instance[RRD_STORAGE_TIERS]; // the database instances of the storage tiers
+
uuid_t host_uuid; // Global GUID for this host
uuid_t *node_id; // Cloud node_id
@@ -900,6 +968,10 @@ extern netdata_rwlock_t rrd_rwlock;
// ----------------------------------------------------------------------------
+extern bool is_storage_engine_shared(STORAGE_INSTANCE *engine);
+
+// ----------------------------------------------------------------------------
+
extern size_t rrd_hosts_available;
extern time_t rrdhost_free_orphan_time;
@@ -1067,28 +1139,49 @@ extern void rrdset_isnot_obsolete(RRDSET *st);
#define rrdset_is_available_for_exporting_and_alarms(st) (!rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && !rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions)
#define rrdset_is_archived(st) (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED) && (st)->dimensions)
-// get the total duration in seconds of the round robin database
-#define rrdset_duration(st) ((time_t)( (((st)->counter >= ((unsigned long)(st)->entries))?(unsigned long)(st)->entries:(st)->counter) * (st)->update_every ))
-
// get the timestamp of the last entry in the round robin database
-static inline time_t rrdset_last_entry_t_nolock(RRDSET *st)
-{
- if (st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
- RRDDIM *rd;
- time_t last_entry_t = 0;
+static inline time_t rrddim_last_entry_t(RRDDIM *rd) {
+ time_t latest = rd->tiers[0]->query_ops.latest_time(rd->tiers[0]->db_metric_handle);
- rrddim_foreach_read(rd, st) {
- last_entry_t = MAX(last_entry_t, rd->state->query_ops.latest_time(rd));
- }
+ for(int tier = 1; tier < storage_tiers ;tier++) {
+ if(unlikely(!rd->tiers[tier])) continue;
- return last_entry_t;
- } else {
- return (time_t)st->last_updated.tv_sec;
+ time_t t = rd->tiers[tier]->query_ops.latest_time(rd->tiers[tier]->db_metric_handle);
+ if(t > latest)
+ latest = t;
}
+
+ return latest;
}
-static inline time_t rrdset_last_entry_t(RRDSET *st)
-{
+static inline time_t rrddim_first_entry_t(RRDDIM *rd) {
+ time_t oldest = 0;
+
+ for(int tier = 0; tier < storage_tiers ;tier++) {
+ if(unlikely(!rd->tiers[tier])) continue;
+
+ time_t t = rd->tiers[tier]->query_ops.oldest_time(rd->tiers[tier]->db_metric_handle);
+ if(t != 0 && (oldest == 0 || t < oldest))
+ oldest = t;
+ }
+
+ return oldest;
+}
+
+// get the timestamp of the last entry in the round robin database
+static inline time_t rrdset_last_entry_t_nolock(RRDSET *st) {
+ RRDDIM *rd;
+ time_t last_entry_t = 0;
+
+ rrddim_foreach_read(rd, st) {
+ time_t t = rrddim_last_entry_t(rd);
+ if(t > last_entry_t) last_entry_t = t;
+ }
+
+ return last_entry_t;
+}
+
+static inline time_t rrdset_last_entry_t(RRDSET *st) {
time_t last_entry_t;
netdata_rwlock_rdlock(&st->rrdset_rwlock);
@@ -1099,24 +1192,18 @@ static inline time_t rrdset_last_entry_t(RRDSET *st)
}
// get the timestamp of first entry in the round robin database
-static inline time_t rrdset_first_entry_t_nolock(RRDSET *st)
-{
- if (st->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) {
- RRDDIM *rd;
- time_t first_entry_t = LONG_MAX;
-
- rrddim_foreach_read(rd, st) {
- first_entry_t =
- MIN(first_entry_t,
- rd->state->query_ops.oldest_time(rd) > st->update_every ?
- rd->state->query_ops.oldest_time(rd) - st->update_every : 0);
- }
-
- if (unlikely(LONG_MAX == first_entry_t)) return 0;
- return first_entry_t;
- } else {
- return (time_t)(rrdset_last_entry_t_nolock(st) - rrdset_duration(st));
+static inline time_t rrdset_first_entry_t_nolock(RRDSET *st) {
+ RRDDIM *rd;
+ time_t first_entry_t = LONG_MAX;
+
+ rrddim_foreach_read(rd, st) {
+ time_t t = rrddim_first_entry_t(rd);
+ if(t < first_entry_t)
+ first_entry_t = t;
}
+
+ if (unlikely(LONG_MAX == first_entry_t)) return 0;
+ return first_entry_t;
}
static inline time_t rrdset_first_entry_t(RRDSET *st)
@@ -1130,105 +1217,8 @@ static inline time_t rrdset_first_entry_t(RRDSET *st)
return first_entry_t;
}
-// get the timestamp of the last entry in the round robin database
-static inline time_t rrddim_last_entry_t(RRDDIM *rd) {
- if (rd->rrdset->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE)
- return rd->state->query_ops.latest_time(rd);
- return (time_t)rd->rrdset->last_updated.tv_sec;
-}
-
-static inline time_t rrddim_first_entry_t(RRDDIM *rd) {
- if (rd->rrdset->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE)
- return rd->state->query_ops.oldest_time(rd);
- return (time_t)(rd->rrdset->last_updated.tv_sec - rrdset_duration(rd->rrdset));
-}
-
time_t rrdhost_last_entry_t(RRDHOST *h);
-// get the last slot updated in the round robin database
-#define rrdset_last_slot(st) ((size_t)(((st)->current_entry == 0) ? (st)->entries - 1 : (st)->current_entry - 1))
-
-// get the first / oldest slot updated in the round robin database
-// #define rrdset_first_slot(st) ((size_t)( (((st)->counter >= ((unsigned long)(st)->entries)) ? (unsigned long)( ((unsigned long)(st)->current_entry > 0) ? ((unsigned long)(st)->current_entry) : ((unsigned long)(st)->entries) ) - 1 : 0) ))
-
-// return the slot that has the oldest value
-
-static inline size_t rrdset_first_slot(RRDSET *st) {
- if(st->counter >= (size_t)st->entries) {
- // the database has been rotated at least once
- // the oldest entry is the one that will be next
- // overwritten by data collection
- return (size_t)st->current_entry;
- }
-
- // we do not have rotated the db yet
- // so 0 is the first entry
- return 0;
-}
-
-// get the slot of the round robin database, for the given timestamp (t)
-// it always returns a valid slot, although may not be for the time requested if the time is outside the round robin database
-// only valid when not using dbengine
-static inline size_t rrdset_time2slot(RRDSET *st, time_t t) {
- size_t ret = 0;
- time_t last_entry_t = rrdset_last_entry_t_nolock(st);
- time_t first_entry_t = rrdset_first_entry_t_nolock(st);
-
- if(t >= last_entry_t) {
- // the requested time is after the last entry we have
- ret = rrdset_last_slot(st);
- }
- else {
- if(t <= first_entry_t) {
- // the requested time is before the first entry we have
- ret = rrdset_first_slot(st);
- }
- else {
- if(rrdset_last_slot(st) >= (size_t)((last_entry_t - t) / st->update_every))
- ret = rrdset_last_slot(st) - ((last_entry_t - t) / st->update_every);
- else
- ret = rrdset_last_slot(st) - ((last_entry_t - t) / st->update_every) + st->entries;
- }
- }
-
- if(unlikely(ret >= (size_t)st->entries)) {
- error("INTERNAL ERROR: rrdset_time2slot() on %s returns values outside entries", st->name);
- ret = (size_t)(st->entries - 1);
- }
-
- return ret;
-}
-
-// get the timestamp of a specific slot in the round robin database
-// only valid when not using dbengine
-static inline time_t rrdset_slot2time(RRDSET *st, size_t slot) {
- time_t ret;
- time_t last_entry_t = rrdset_last_entry_t_nolock(st);
- time_t first_entry_t = rrdset_first_entry_t_nolock(st);
-
- if(slot >= (size_t)st->entries) {
- error("INTERNAL ERROR: caller of rrdset_slot2time() gives invalid slot %zu", slot);
- slot = (size_t)st->entries - 1;
- }
-
- if(slot > rrdset_last_slot(st))
- ret = last_entry_t - (time_t)(st->update_every * (rrdset_last_slot(st) - slot + (size_t)st->entries));
- else
- ret = last_entry_t - (time_t)(st->update_every * (rrdset_last_slot(st) - slot));
-
- if(unlikely(ret < first_entry_t)) {
- error("INTERNAL ERROR: rrdset_slot2time() on %s returns time too far in the past", st->name);
- ret = first_entry_t;
- }
-
- if(unlikely(ret > last_entry_t)) {
- error("INTERNAL ERROR: rrdset_slot2time() on %s returns time into the future", st->name);
- ret = last_entry_t;
- }
-
- return ret;
-}
-
// ----------------------------------------------------------------------------
// RRD DIMENSION functions
@@ -1318,6 +1308,8 @@ extern void set_host_properties(
const char *guid, const char *os, const char *tags, const char *tzone, const char *abbrev_tzone, int32_t utc_offset,
const char *program_name, const char *program_version);
+extern int get_tier_grouping(int tier);
+
// ----------------------------------------------------------------------------
// RRD DB engine declarations