summaryrefslogtreecommitdiffstats
path: root/database
diff options
context:
space:
mode:
authorvkalintiris <vasilis@netdata.cloud>2022-12-21 15:03:05 +0200
committerGitHub <noreply@github.com>2022-12-21 15:03:05 +0200
commit689dc6b7fbbf495ce3e020dcff0d014a8d338c52 (patch)
tree1f458a3218798b53809d0868a82fcad1a20b1e32 /database
parentfe386aad57f24574783f4c68bab433a5cdfe6f64 (diff)
Refactor ML code and add support for multiple KMeans models. (#14065)
* Add profile.plugin Creates the specified number of charts/dimensions, and supports backfilling with pseudo-historical data. * Bump * Remove wrongly merged line. * Use the number of models specified from the config section. * Add option to consult all ML models. * Remove profiling option consuming all models. * Add underscore after chart name prefix. * prediction -> dimensions chart * reorder funcs * Split charts across types with correct priority * Ignore training request when chart is under replication. * Track global number of models consulted. * Cleanup config. * initial readme updates * fix readme * readme * Fix function definition when ML is disabled. * Add dummy ml_chart_update_{begin,end} * Remove profile_plugin * Define chart priorities under collectors/all.h * s/curr_t/current_time/ Co-authored-by: Andrew Maguire <andrewm4894@gmail.com>
Diffstat (limited to 'database')
-rw-r--r--database/rrd.h18
-rw-r--r--database/rrdcontext.c9
-rw-r--r--database/rrddim.c12
-rw-r--r--database/rrdhost.c6
-rw-r--r--database/rrdset.c18
5 files changed, 39 insertions, 24 deletions
diff --git a/database/rrd.h b/database/rrd.h
index 0796ff9012..f0d66a02ee 100644
--- a/database/rrd.h
+++ b/database/rrd.h
@@ -30,8 +30,9 @@ typedef struct rrdhost_acquired RRDHOST_ACQUIRED;
typedef struct rrdset_acquired RRDSET_ACQUIRED;
typedef struct rrddim_acquired RRDDIM_ACQUIRED;
-typedef void *ml_host_t;
-typedef void *ml_dimension_t;
+typedef struct ml_host ml_host_t;
+typedef struct ml_chart ml_chart_t;
+typedef struct ml_dimension ml_dimension_t;
typedef enum {
QUERY_SOURCE_UNKNOWN,
@@ -296,7 +297,7 @@ struct rrddim {
// ------------------------------------------------------------------------
// operational state members
- ml_dimension_t ml_dimension; // machine learning data about this dimension
+ ml_dimension_t *ml_dimension; // machine learning data about this dimension
// ------------------------------------------------------------------------
// linking to siblings and parents
@@ -595,6 +596,8 @@ struct rrdset {
DICTIONARY *rrddimvar_root_index; // dimension variables
// we use this dictionary to manage their allocation
+ ml_chart_t *ml_chart;
+
// ------------------------------------------------------------------------
// operational state members
@@ -1028,7 +1031,7 @@ struct rrdhost {
// ------------------------------------------------------------------------
// ML handle
- ml_host_t ml_host;
+ ml_host_t *ml_host;
// ------------------------------------------------------------------------
// Support for host-level labels
@@ -1301,9 +1304,12 @@ void rrdset_isnot_obsolete(RRDSET *st);
time_t rrddim_first_entry_t(RRDDIM *rd);
time_t rrddim_first_entry_t_of_tier(RRDDIM *rd, size_t tier);
time_t rrddim_last_entry_t(RRDDIM *rd);
-time_t rrdset_last_entry_t(RRDSET *st);
-time_t rrdset_first_entry_t_of_tier(RRDSET *st, size_t tier);
+time_t rrddim_last_entry_t_of_tier(RRDDIM *rd, size_t tier);
+
time_t rrdset_first_entry_t(RRDSET *st);
+time_t rrdset_first_entry_t_of_tier(RRDSET *st, size_t tier);
+time_t rrdset_last_entry_t(RRDSET *st);
+
time_t rrdhost_last_entry_t(RRDHOST *h);
// ----------------------------------------------------------------------------
diff --git a/database/rrdcontext.c b/database/rrdcontext.c
index 3413d1ea82..2df42ef47b 100644
--- a/database/rrdcontext.c
+++ b/database/rrdcontext.c
@@ -750,11 +750,6 @@ static void rrdinstance_free(RRDINSTANCE *ri) {
}
static void rrdinstance_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext) {
- static STRING *ml_anomaly_rates_id = NULL;
-
- if(unlikely(!ml_anomaly_rates_id))
- ml_anomaly_rates_id = string_strdupz(ML_ANOMALY_RATES_CHART_ID);
-
RRDINSTANCE *ri = value;
// link it to its parent
@@ -781,10 +776,6 @@ static void rrdinstance_insert_callback(const DICTIONARY_ITEM *item __maybe_unus
ri->flags &= ~RRD_FLAG_HIDDEN; // no need of atomics at the constructor
}
- // we need this when loading from SQL
- if(unlikely(ri->id == ml_anomaly_rates_id))
- ri->flags |= RRD_FLAG_HIDDEN; // no need of atomics at the constructor
-
rrdmetrics_create_in_rrdinstance(ri);
// signal the react callback to do the job
diff --git a/database/rrddim.c b/database/rrddim.c
index 2d909a7015..07f6c5d9d7 100644
--- a/database/rrddim.c
+++ b/database/rrddim.c
@@ -172,7 +172,7 @@ static void rrddim_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v
rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK);
rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED);
- ml_new_dimension(rd);
+ ml_dimension_new(rd);
ctr->react_action = RRDDIM_REACT_NEW;
@@ -191,7 +191,7 @@ static void rrddim_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, v
rrdcontext_removed_rrddim(rd);
- ml_delete_dimension(rd);
+ ml_dimension_delete(rd);
debug(D_RRD_CALLS, "rrddim_free() %s.%s", rrdset_name(st), rrddim_name(rd));
@@ -420,7 +420,13 @@ inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor)
// ----------------------------------------------------------------------------
-// get the timestamp of the last entry in the round-robin database
+time_t rrddim_last_entry_t_of_tier(RRDDIM *rd, size_t tier) {
+ if(unlikely(tier > storage_tiers || !rd->tiers[tier]))
+ return 0;
+
+ return rd->tiers[tier]->query_ops->latest_time(rd->tiers[tier]->db_metric_handle);
+}
+
time_t rrddim_last_entry_t(RRDDIM *rd) {
time_t latest = rd->tiers[0]->query_ops->latest_time(rd->tiers[0]->db_metric_handle);
diff --git a/database/rrdhost.c b/database/rrdhost.c
index 5ba13d47be..4982683078 100644
--- a/database/rrdhost.c
+++ b/database/rrdhost.c
@@ -518,7 +518,7 @@ int is_legacy = 1;
rrdhost_load_rrdcontext_data(host);
if (!archived)
- ml_new_host(host);
+ ml_host_new(host);
else
rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED);
@@ -629,7 +629,7 @@ void rrdhost_update(RRDHOST *host
host->rrdpush_replication_step = rrdpush_replication_step;
rrd_hosts_available++;
- ml_new_host(host);
+ ml_host_new(host);
rrdhost_load_rrdcontext_data(host);
info("Host %s is not in archived mode anymore", rrdhost_hostname(host));
}
@@ -1089,7 +1089,7 @@ void rrdhost_free(RRDHOST *host, bool force) {
rrd_check_wrlock(); // make sure the RRDs are write locked
rrdhost_wrlock(host);
- ml_delete_host(host);
+ ml_host_delete(host);
rrdhost_unlock(host);
// ------------------------------------------------------------------------
diff --git a/database/rrdset.c b/database/rrdset.c
index 6eb3c7105f..8a1cd6d90b 100644
--- a/database/rrdset.c
+++ b/database/rrdset.c
@@ -178,6 +178,8 @@ static void rrdset_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v
st->red = NAN;
ctr->react_action = RRDSET_REACT_NEW;
+
+ ml_chart_new(st);
}
// the destructor - the dictionary is write locked while this runs
@@ -232,6 +234,9 @@ static void rrdset_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, v
// 7. destroy the chart labels
rrdlabels_destroy(st->rrdlabels); // destroy the labels, after letting the contexts know
+ // 8. destroy the ml handle
+ ml_chart_delete(st);
+
rrdset_memory_file_free(st); // remove files of db mode save and map
// ------------------------------------------------------------------------
@@ -1253,6 +1258,8 @@ static inline size_t rrdset_done_interpolate(
last_ut = next_store_ut;
+ ml_chart_update_begin(st);
+
struct rda_item *rda;
size_t dim_id;
for(dim_id = 0, rda = rda_base ; dim_id < rda_slots ; ++dim_id, ++rda) {
@@ -1332,8 +1339,11 @@ static inline size_t rrdset_done_interpolate(
break;
}
+ time_t current_time = (time_t) (next_store_ut / USEC_PER_SEC);
+
if(unlikely(!store_this_entry)) {
- (void) ml_is_anomalous(rd, 0, false);
+ (void) ml_is_anomalous(rd, current_time, 0, false);
+
rrddim_store_metric(rd, next_store_ut, NAN, SN_FLAG_NONE);
rrdcontext_collected_rrddim(rd);
continue;
@@ -1342,7 +1352,7 @@ static inline size_t rrdset_done_interpolate(
if(likely(rd->updated && rd->collections_counter > 1 && iterations < st->gap_when_lost_iterations_above)) {
uint32_t dim_storage_flags = storage_flags;
- if (ml_is_anomalous(rd, new_value, true)) {
+ if (ml_is_anomalous(rd, current_time, new_value, true)) {
// clear anomaly bit: 0 -> is anomalous, 1 -> not anomalous
dim_storage_flags &= ~((storage_number)SN_FLAG_NOT_ANOMALOUS);
}
@@ -1352,7 +1362,7 @@ static inline size_t rrdset_done_interpolate(
rd->last_stored_value = new_value;
}
else {
- (void) ml_is_anomalous(rd, 0, false);
+ (void) ml_is_anomalous(rd, current_time, 0, false);
rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING ", rrddim_name(rd), current_entry);
@@ -1364,6 +1374,8 @@ static inline size_t rrdset_done_interpolate(
stored_entries++;
}
+ ml_chart_update_end(st);
+
// reset the storage flags for the next point, if any;
storage_flags = SN_DEFAULT_FLAGS;