summaryrefslogtreecommitdiffstats
path: root/libnetdata
diff options
context:
space:
mode:
authorStelios Fragkakis <52996999+stelfrag@users.noreply.github.com>2022-07-06 14:01:53 +0300
committerGitHub <noreply@github.com>2022-07-06 14:01:53 +0300
commit49234f23de3a32682daff07ca229b6b62f24c090 (patch)
treea81ed628abcf4457737bcc3597b097e8e430497a /libnetdata
parent8d5850fd49bf6308cd6cab690cdbba4a35505b39 (diff)
Multi-Tier database backend for long term metrics storage (#13263)
* Tier part 1 * Tier part 2 * Tier part 3 * Tier part 4 * Tier part 5 * Fix some ML compilation errors * fix more conflicts * pass proper tier * move metric_uuid from state to RRDDIM * move aclk_live_status from state to RRDDIM * move ml_dimension from state to RRDDIM * abstracted the data collection interface * support flushing for mem db too * abstracted the query api * abstracted latest/oldest time per metric * cleanup * store_metric for tier1 * fix for store_metric * allow multiple tiers, more than 2 * state to tier * Change storage type in db. Query param to request min, max, sum or average * Store tier data correctly * Fix skipping tier page type * Add tier grouping in the tier * Fix to handle archived charts (part 1) * Temp fix for query granularity when requesting tier1 data * Fix parameters in the correct order and calculate the anomaly based on the anomaly count * Proper tiering grouping * Anomaly calculation based on anomaly count * force type checking on storage handles * update cmocka tests * fully dynamic number of storage tiers * fix static allocation * configure grouping for all tiers; disable tiers for unittest; disable statsd configuration for private charts mode * use default page dt using the tiering info * automatic selection of tier * fix for automatic selection of tier * working prototype of dynamic tier selection * automatic selection of tier done right (I hope) * ask for the proper tier value, based on the grouping function * fixes for unittests and load_metric_next() * fixes for lgtm findings * minor renames * add dbengine to page cache size setting * add dbengine to page cache with malloc * query engine optimized to loop as little are required based on the view_update_every * query engine grouping methods now do not assume a constant number of points per group and they allocate memory with OWA * report db points per tier in jsonwrap * query planer that switches database tiers on the fly to satisfy the query for the entire timeframe * dbegnine statistics and documentation (in progress) * calculate average point duration in db * handle single point pages the best we can * handle single point pages even better * Keep page type in the rrdeng_page_descr * updated doc * handle future backwards compatibility - improved statistics * support &tier=X in queries * enfore increasing iterations on tiers * tier 1 is always 1 iteration * backfilling higher tiers on first data collection * reversed anomaly bit * set up to 5 tiers * natural points should only be offered on tier 0, except a specific tier is selected * do not allow more than 65535 points of tier0 to be aggregated on any tier * Work only on actually activated tiers * fix query interpolation * fix query interpolation again * fix lgtm finding * Activate one tier for now * backfilling of higher tiers using raw metrics from lower tiers * fix for crash on start when storage tiers is increased from the default * more statistics on exit * fix bug that prevented higher tiers to get any values; added backfilling options * fixed the statistics log line * removed limit of 255 iterations per tier; moved the code of freezing rd->tiers[x]->db_metric_handle * fixed division by zero on zero points_wanted * removed dead code * Decide on the descr->type for the type of metric * dont store metrics on unknown page types * free db_metric_handle on sql based context queries * Disable STORAGE_POINT value check in the exporting engine unit tests * fix for db modes other than dbengine * fix for aclk archived chart queries destroying db_metric_handles of valid rrddims * fix left-over freez() instead of OWA freez on median queries Co-authored-by: Costa Tsaousis <costa@netdata.cloud> Co-authored-by: Vladimir Kobal <vlad@prokk.net>
Diffstat (limited to 'libnetdata')
-rw-r--r--libnetdata/libnetdata.h2
-rw-r--r--libnetdata/log/log.h2
-rw-r--r--libnetdata/onewayalloc/onewayalloc.c8
-rw-r--r--libnetdata/onewayalloc/onewayalloc.h2
-rw-r--r--libnetdata/storage_number/storage_number.h9
5 files changed, 22 insertions, 1 deletions
diff --git a/libnetdata/libnetdata.h b/libnetdata/libnetdata.h
index 34062f2a65..8f4178330e 100644
--- a/libnetdata/libnetdata.h
+++ b/libnetdata/libnetdata.h
@@ -351,6 +351,8 @@ extern char *netdata_configured_host_prefix;
// BEWARE: Outside of the C code this also exists in alarm-notify.sh
#define DEFAULT_CLOUD_BASE_URL "https://app.netdata.cloud"
+#define RRD_STORAGE_TIERS 5
+
# ifdef __cplusplus
}
# endif
diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h
index 9a69a8737b..ea2be277c1 100644
--- a/libnetdata/log/log.h
+++ b/libnetdata/log/log.h
@@ -82,7 +82,7 @@ static inline void debug_dummy(void) {}
#ifdef NETDATA_INTERNAL_CHECKS
#define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
-#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("INTERNAL ERROR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
+#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
#else
#define debug(type, args...) debug_dummy()
#define internal_error(args...) debug_dummy()
diff --git a/libnetdata/onewayalloc/onewayalloc.c b/libnetdata/onewayalloc/onewayalloc.c
index 6023eff983..8f980f70f2 100644
--- a/libnetdata/onewayalloc/onewayalloc.c
+++ b/libnetdata/onewayalloc/onewayalloc.c
@@ -166,6 +166,14 @@ void onewayalloc_freez(ONEWAYALLOC *owa __maybe_unused, const void *ptr __maybe_
return;
}
+void *onewayalloc_doublesize(ONEWAYALLOC *owa, const void *src, size_t oldsize) {
+ size_t newsize = oldsize * 2;
+ void *dst = onewayalloc_mallocz(owa, newsize);
+ memcpy(dst, src, oldsize);
+ onewayalloc_freez(owa, src);
+ return dst;
+}
+
void onewayalloc_destroy(ONEWAYALLOC *owa) {
if(!owa) return;
diff --git a/libnetdata/onewayalloc/onewayalloc.h b/libnetdata/onewayalloc/onewayalloc.h
index ed8f12f39b..9eb908bfb6 100644
--- a/libnetdata/onewayalloc/onewayalloc.h
+++ b/libnetdata/onewayalloc/onewayalloc.h
@@ -14,4 +14,6 @@ extern char *onewayalloc_strdupz(ONEWAYALLOC *owa, const char *s);
extern void *onewayalloc_memdupz(ONEWAYALLOC *owa, const void *src, size_t size);
extern void onewayalloc_freez(ONEWAYALLOC *owa, const void *ptr);
+extern void *onewayalloc_doublesize(ONEWAYALLOC *owa, const void *src, size_t oldsize);
+
#endif // ONEWAYALLOC_H
diff --git a/libnetdata/storage_number/storage_number.h b/libnetdata/storage_number/storage_number.h
index 722d35d386..be775f0be7 100644
--- a/libnetdata/storage_number/storage_number.h
+++ b/libnetdata/storage_number/storage_number.h
@@ -63,6 +63,15 @@ typedef long long collected_number;
#endif
typedef uint32_t storage_number;
+
+typedef struct storage_number_tier1 {
+ float sum_value;
+ float min_value;
+ float max_value;
+ uint16_t count;
+ uint16_t anomaly_count;
+} storage_number_tier1_t;
+
#define STORAGE_NUMBER_FORMAT "%u"
typedef enum {