diff options
author | vkalintiris <vasilis@netdata.cloud> | 2023-01-04 14:51:25 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-04 14:51:25 +0200 |
commit | 78359cd375d0b2c285741e6f934a681d0a0c3c15 (patch) | |
tree | 2d5264325510b663d9e87ca62a38fad187e3a713 /database | |
parent | df379e45fbaddf825f1f7972a75ae3f3daf80097 (diff) |
Refactor ML code and add support for multiple KMeans models (#14198)
* Add profile.plugin
Creates the specified number of charts/dimensions, and supports
backfilling with pseudo-historical data.
* Bump
* Remove wrongly merged line.
* Use the number of models specified from the config section.
* Add option to consult all ML models.
* Remove profiling option consuming all models.
* Add underscore after chart name prefix.
* prediction -> dimensions chart
* reorder funcs
* Split charts across types with correct priority
* Ignore training request when chart is under replication.
* Track global number of models consulted.
* Cleanup config.
* initial readme updates
* fix readme
* readme
* Fix function definition when ML is disabled.
* Add dummy ml_chart_update_{begin,end}
* Remove profile_plugin
* Define chart priorities under collectors/all.h
* s/curr_t/current_time/
* Use libnetdata's lock/thread wrappers.
* Fix autotools & cmake builds.
* Delete ML dimensions & charts.
* Let users of buffer preprocessing to handle memory.
* Add separate API calls to start/stop ML threads.
Co-authored-by: Andrew Maguire <andrewm4894@gmail.com>
Diffstat (limited to 'database')
-rw-r--r-- | database/rrd.h | 18 | ||||
-rw-r--r-- | database/rrdcontext.c | 9 | ||||
-rw-r--r-- | database/rrddim.c | 12 | ||||
-rw-r--r-- | database/rrdhost.c | 20 | ||||
-rw-r--r-- | database/rrdset.c | 18 |
5 files changed, 48 insertions, 29 deletions
diff --git a/database/rrd.h b/database/rrd.h index 0796ff9012..f0d66a02ee 100644 --- a/database/rrd.h +++ b/database/rrd.h @@ -30,8 +30,9 @@ typedef struct rrdhost_acquired RRDHOST_ACQUIRED; typedef struct rrdset_acquired RRDSET_ACQUIRED; typedef struct rrddim_acquired RRDDIM_ACQUIRED; -typedef void *ml_host_t; -typedef void *ml_dimension_t; +typedef struct ml_host ml_host_t; +typedef struct ml_chart ml_chart_t; +typedef struct ml_dimension ml_dimension_t; typedef enum { QUERY_SOURCE_UNKNOWN, @@ -296,7 +297,7 @@ struct rrddim { // ------------------------------------------------------------------------ // operational state members - ml_dimension_t ml_dimension; // machine learning data about this dimension + ml_dimension_t *ml_dimension; // machine learning data about this dimension // ------------------------------------------------------------------------ // linking to siblings and parents @@ -595,6 +596,8 @@ struct rrdset { DICTIONARY *rrddimvar_root_index; // dimension variables // we use this dictionary to manage their allocation + ml_chart_t *ml_chart; + // ------------------------------------------------------------------------ // operational state members @@ -1028,7 +1031,7 @@ struct rrdhost { // ------------------------------------------------------------------------ // ML handle - ml_host_t ml_host; + ml_host_t *ml_host; // ------------------------------------------------------------------------ // Support for host-level labels @@ -1301,9 +1304,12 @@ void rrdset_isnot_obsolete(RRDSET *st); time_t rrddim_first_entry_t(RRDDIM *rd); time_t rrddim_first_entry_t_of_tier(RRDDIM *rd, size_t tier); time_t rrddim_last_entry_t(RRDDIM *rd); -time_t rrdset_last_entry_t(RRDSET *st); -time_t rrdset_first_entry_t_of_tier(RRDSET *st, size_t tier); +time_t rrddim_last_entry_t_of_tier(RRDDIM *rd, size_t tier); + time_t rrdset_first_entry_t(RRDSET *st); +time_t rrdset_first_entry_t_of_tier(RRDSET *st, size_t tier); +time_t rrdset_last_entry_t(RRDSET *st); + time_t rrdhost_last_entry_t(RRDHOST *h); // ---------------------------------------------------------------------------- diff --git a/database/rrdcontext.c b/database/rrdcontext.c index 3413d1ea82..2df42ef47b 100644 --- a/database/rrdcontext.c +++ b/database/rrdcontext.c @@ -750,11 +750,6 @@ static void rrdinstance_free(RRDINSTANCE *ri) { } static void rrdinstance_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *rrdcontext) { - static STRING *ml_anomaly_rates_id = NULL; - - if(unlikely(!ml_anomaly_rates_id)) - ml_anomaly_rates_id = string_strdupz(ML_ANOMALY_RATES_CHART_ID); - RRDINSTANCE *ri = value; // link it to its parent @@ -781,10 +776,6 @@ static void rrdinstance_insert_callback(const DICTIONARY_ITEM *item __maybe_unus ri->flags &= ~RRD_FLAG_HIDDEN; // no need of atomics at the constructor } - // we need this when loading from SQL - if(unlikely(ri->id == ml_anomaly_rates_id)) - ri->flags |= RRD_FLAG_HIDDEN; // no need of atomics at the constructor - rrdmetrics_create_in_rrdinstance(ri); // signal the react callback to do the job diff --git a/database/rrddim.c b/database/rrddim.c index 2d909a7015..07f6c5d9d7 100644 --- a/database/rrddim.c +++ b/database/rrddim.c @@ -172,7 +172,7 @@ static void rrddim_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); - ml_new_dimension(rd); + ml_dimension_new(rd); ctr->react_action = RRDDIM_REACT_NEW; @@ -191,7 +191,7 @@ static void rrddim_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, v rrdcontext_removed_rrddim(rd); - ml_delete_dimension(rd); + ml_dimension_delete(rd); debug(D_RRD_CALLS, "rrddim_free() %s.%s", rrdset_name(st), rrddim_name(rd)); @@ -420,7 +420,13 @@ inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor) // ---------------------------------------------------------------------------- -// get the timestamp of the last entry in the round-robin database +time_t rrddim_last_entry_t_of_tier(RRDDIM *rd, size_t tier) { + if(unlikely(tier > storage_tiers || !rd->tiers[tier])) + return 0; + + return rd->tiers[tier]->query_ops->latest_time(rd->tiers[tier]->db_metric_handle); +} + time_t rrddim_last_entry_t(RRDDIM *rd) { time_t latest = rd->tiers[0]->query_ops->latest_time(rd->tiers[0]->db_metric_handle); diff --git a/database/rrdhost.c b/database/rrdhost.c index 5ba13d47be..cc94df3e3b 100644 --- a/database/rrdhost.c +++ b/database/rrdhost.c @@ -517,9 +517,10 @@ int is_legacy = 1; rrd_hosts_available++; rrdhost_load_rrdcontext_data(host); - if (!archived) - ml_new_host(host); - else + if (!archived) { + ml_host_new(host); + ml_start_anomaly_detection_threads(host); + } else rrdhost_flag_set(host, RRDHOST_FLAG_ARCHIVED); @@ -629,7 +630,10 @@ void rrdhost_update(RRDHOST *host host->rrdpush_replication_step = rrdpush_replication_step; rrd_hosts_available++; - ml_new_host(host); + + ml_host_new(host); + ml_start_anomaly_detection_threads(host); + rrdhost_load_rrdcontext_data(host); info("Host %s is not in archived mode anymore", rrdhost_hostname(host)); } @@ -1088,10 +1092,6 @@ void rrdhost_free(RRDHOST *host, bool force) { rrd_check_wrlock(); // make sure the RRDs are write locked - rrdhost_wrlock(host); - ml_delete_host(host); - rrdhost_unlock(host); - // ------------------------------------------------------------------------ // clean up streaming @@ -1126,6 +1126,10 @@ void rrdhost_free(RRDHOST *host, bool force) { rrdcalc_rrdhost_index_destroy(host); rrdcalctemplate_index_destroy(host); + // cleanup ML resources + ml_stop_anomaly_detection_threads(host); + ml_host_delete(host); + freez(host->exporting_flags); health_alarm_log_free(host); diff --git a/database/rrdset.c b/database/rrdset.c index 6eb3c7105f..8a1cd6d90b 100644 --- a/database/rrdset.c +++ b/database/rrdset.c @@ -178,6 +178,8 @@ static void rrdset_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v st->red = NAN; ctr->react_action = RRDSET_REACT_NEW; + + ml_chart_new(st); } // the destructor - the dictionary is write locked while this runs @@ -232,6 +234,9 @@ static void rrdset_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, v // 7. destroy the chart labels rrdlabels_destroy(st->rrdlabels); // destroy the labels, after letting the contexts know + // 8. destroy the ml handle + ml_chart_delete(st); + rrdset_memory_file_free(st); // remove files of db mode save and map // ------------------------------------------------------------------------ @@ -1253,6 +1258,8 @@ static inline size_t rrdset_done_interpolate( last_ut = next_store_ut; + ml_chart_update_begin(st); + struct rda_item *rda; size_t dim_id; for(dim_id = 0, rda = rda_base ; dim_id < rda_slots ; ++dim_id, ++rda) { @@ -1332,8 +1339,11 @@ static inline size_t rrdset_done_interpolate( break; } + time_t current_time = (time_t) (next_store_ut / USEC_PER_SEC); + if(unlikely(!store_this_entry)) { - (void) ml_is_anomalous(rd, 0, false); + (void) ml_is_anomalous(rd, current_time, 0, false); + rrddim_store_metric(rd, next_store_ut, NAN, SN_FLAG_NONE); rrdcontext_collected_rrddim(rd); continue; @@ -1342,7 +1352,7 @@ static inline size_t rrdset_done_interpolate( if(likely(rd->updated && rd->collections_counter > 1 && iterations < st->gap_when_lost_iterations_above)) { uint32_t dim_storage_flags = storage_flags; - if (ml_is_anomalous(rd, new_value, true)) { + if (ml_is_anomalous(rd, current_time, new_value, true)) { // clear anomaly bit: 0 -> is anomalous, 1 -> not anomalous dim_storage_flags &= ~((storage_number)SN_FLAG_NOT_ANOMALOUS); } @@ -1352,7 +1362,7 @@ static inline size_t rrdset_done_interpolate( rd->last_stored_value = new_value; } else { - (void) ml_is_anomalous(rd, 0, false); + (void) ml_is_anomalous(rd, current_time, 0, false); rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING ", rrddim_name(rd), current_entry); @@ -1364,6 +1374,8 @@ static inline size_t rrdset_done_interpolate( stored_entries++; } + ml_chart_update_end(st); + // reset the storage flags for the next point, if any; storage_flags = SN_DEFAULT_FLAGS; |