diff options
author | vkalintiris <vasilis@netdata.cloud> | 2023-01-04 14:51:25 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-04 14:51:25 +0200 |
commit | 78359cd375d0b2c285741e6f934a681d0a0c3c15 (patch) | |
tree | 2d5264325510b663d9e87ca62a38fad187e3a713 /ml | |
parent | df379e45fbaddf825f1f7972a75ae3f3daf80097 (diff) |
Refactor ML code and add support for multiple KMeans models (#14198)
* Add profile.plugin
Creates the specified number of charts/dimensions, and supports
backfilling with pseudo-historical data.
* Bump
* Remove wrongly merged line.
* Use the number of models specified from the config section.
* Add option to consult all ML models.
* Remove profiling option consuming all models.
* Add underscore after chart name prefix.
* prediction -> dimensions chart
* reorder funcs
* Split charts across types with correct priority
* Ignore training request when chart is under replication.
* Track global number of models consulted.
* Cleanup config.
* initial readme updates
* fix readme
* readme
* Fix function definition when ML is disabled.
* Add dummy ml_chart_update_{begin,end}
* Remove profile_plugin
* Define chart priorities under collectors/all.h
* s/curr_t/current_time/
* Use libnetdata's lock/thread wrappers.
* Fix autotools & cmake builds.
* Delete ML dimensions & charts.
* Let users of buffer preprocessing to handle memory.
* Add separate API calls to start/stop ML threads.
Co-authored-by: Andrew Maguire <andrewm4894@gmail.com>
Diffstat (limited to 'ml')
-rw-r--r-- | ml/ADCharts.cc | 496 | ||||
-rw-r--r-- | ml/ADCharts.h | 10 | ||||
-rw-r--r-- | ml/Chart.cc | 0 | ||||
-rw-r--r-- | ml/Chart.h | 128 | ||||
-rw-r--r-- | ml/Config.cc | 6 | ||||
-rw-r--r-- | ml/Config.h | 1 | ||||
-rw-r--r-- | ml/Dimension.cc | 309 | ||||
-rw-r--r-- | ml/Dimension.h | 178 | ||||
-rw-r--r-- | ml/Host.cc | 393 | ||||
-rw-r--r-- | ml/Host.h | 97 | ||||
-rw-r--r-- | ml/Mutex.h | 36 | ||||
-rw-r--r-- | ml/Query.h | 2 | ||||
-rw-r--r-- | ml/Queue.h | 59 | ||||
-rw-r--r-- | ml/README.md | 76 | ||||
-rw-r--r-- | ml/SamplesBuffer.cc | 52 | ||||
-rw-r--r-- | ml/SamplesBuffer.h | 3 | ||||
-rw-r--r-- | ml/SamplesBufferTests.cc | 146 | ||||
-rw-r--r-- | ml/Stats.h | 46 | ||||
-rw-r--r-- | ml/ml-dummy.c | 51 | ||||
-rw-r--r-- | ml/ml-private.h | 13 | ||||
-rw-r--r-- | ml/ml.cc | 108 | ||||
-rw-r--r-- | ml/ml.h | 28 |
22 files changed, 1495 insertions, 743 deletions
diff --git a/ml/ADCharts.cc b/ml/ADCharts.cc index 00c593c0c4..49816f8f4b 100644 --- a/ml/ADCharts.cc +++ b/ml/ADCharts.cc @@ -3,55 +3,182 @@ #include "ADCharts.h" #include "Config.h" -void ml::updateDimensionsChart(RRDHOST *RH, - collected_number NumTrainedDimensions, - collected_number NumNormalDimensions, - collected_number NumAnomalousDimensions) { - static thread_local RRDSET *RS = nullptr; - static thread_local RRDDIM *NumTotalDimensionsRD = nullptr; - static thread_local RRDDIM *NumTrainedDimensionsRD = nullptr; - static thread_local RRDDIM *NumNormalDimensionsRD = nullptr; - static thread_local RRDDIM *NumAnomalousDimensionsRD = nullptr; - - if (!RS) { - std::stringstream IdSS, NameSS; +void ml::updateDimensionsChart(RRDHOST *RH, const MachineLearningStats &MLS) { + /* + * Machine learning status + */ + { + static thread_local RRDSET *MachineLearningStatusRS = nullptr; + + static thread_local RRDDIM *Enabled = nullptr; + static thread_local RRDDIM *DisabledUE = nullptr; + static thread_local RRDDIM *DisabledSP = nullptr; + + if (!MachineLearningStatusRS) { + std::stringstream IdSS, NameSS; + + IdSS << "machine_learning_status_for_" << localhost->machine_guid; + NameSS << "machine_learning_status_for_" << localhost->hostname; + + MachineLearningStatusRS = rrdset_create_localhost( + "netdata", // type + IdSS.str().c_str(), // id + NameSS.str().c_str(), // name + "ml", // family + "netdata.machine_learning_status", // ctx + "Machine learning status", // title + "dimensions", // units + "netdata", // plugin + "ml", // module + NETDATA_ML_CHART_PRIO_MACHINE_LEARNING_STATUS, // priority + RH->rrd_update_every, // update_every + RRDSET_TYPE_LINE // chart_type + ); + rrdset_flag_set(MachineLearningStatusRS , RRDSET_FLAG_ANOMALY_DETECTION); + + Enabled = rrddim_add(MachineLearningStatusRS, "enabled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + DisabledUE = rrddim_add(MachineLearningStatusRS, "disabled-ue", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + DisabledSP = rrddim_add(MachineLearningStatusRS, "disabled-sp", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(MachineLearningStatusRS, Enabled, MLS.NumMachineLearningStatusEnabled); + rrddim_set_by_pointer(MachineLearningStatusRS, DisabledUE, MLS.NumMachineLearningStatusDisabledUE); + rrddim_set_by_pointer(MachineLearningStatusRS, DisabledSP, MLS.NumMachineLearningStatusDisabledSP); + + rrdset_done(MachineLearningStatusRS); + } - IdSS << "dimensions_on_" << localhost->machine_guid; - NameSS << "dimensions_on_" << localhost->hostname; + /* + * Metric type + */ + { + static thread_local RRDSET *MetricTypesRS = nullptr; + + static thread_local RRDDIM *Constant = nullptr; + static thread_local RRDDIM *Variable = nullptr; + + if (!MetricTypesRS) { + std::stringstream IdSS, NameSS; + + IdSS << "metric_types_for_" << localhost->machine_guid; + NameSS << "metric_types_for_" << localhost->hostname; + + MetricTypesRS = rrdset_create_localhost( + "netdata", // type + IdSS.str().c_str(), // id + NameSS.str().c_str(), // name + "ml", // family + "netdata.metric_types", // ctx + "Dimensions by metric type", // title + "dimensions", // units + "netdata", // plugin + "ml", // module + NETDATA_ML_CHART_PRIO_METRIC_TYPES, // priority + RH->rrd_update_every, // update_every + RRDSET_TYPE_LINE // chart_type + ); + rrdset_flag_set(MetricTypesRS, RRDSET_FLAG_ANOMALY_DETECTION); + + Constant = rrddim_add(MetricTypesRS, "constant", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + Variable = rrddim_add(MetricTypesRS, "variable", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(MetricTypesRS, Constant, MLS.NumMetricTypeConstant); + rrddim_set_by_pointer(MetricTypesRS, Variable, MLS.NumMetricTypeVariable); + + rrdset_done(MetricTypesRS); + } - RS = rrdset_create( - RH, - "anomaly_detection", // type - IdSS.str().c_str(), // id - NameSS.str().c_str(), // name - "dimensions", // family - "anomaly_detection.dimensions", // ctx - "Anomaly detection dimensions", // title - "dimensions", // units - "netdata", // plugin - "ml", // module - 39183, // priority - RH->rrd_update_every, // update_every - RRDSET_TYPE_LINE // chart_type - ); - rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION); - - NumTotalDimensionsRD = rrddim_add(RS, "total", NULL, - 1, 1, RRD_ALGORITHM_ABSOLUTE); - NumTrainedDimensionsRD = rrddim_add(RS, "trained", NULL, - 1, 1, RRD_ALGORITHM_ABSOLUTE); - NumNormalDimensionsRD = rrddim_add(RS, "normal", NULL, - 1, 1, RRD_ALGORITHM_ABSOLUTE); - NumAnomalousDimensionsRD = rrddim_add(RS, "anomalous", NULL, - 1, 1, RRD_ALGORITHM_ABSOLUTE); + /* + * Training status + */ + { + static thread_local RRDSET *TrainingStatusRS = nullptr; + + static thread_local RRDDIM *Untrained = nullptr; + static thread_local RRDDIM *PendingWithoutModel = nullptr; + static thread_local RRDDIM *Trained = nullptr; + static thread_local RRDDIM *PendingWithModel = nullptr; + + if (!TrainingStatusRS) { + std::stringstream IdSS, NameSS; + + IdSS << "training_status_for_" << localhost->machine_guid; + NameSS << "training_status_for_" << localhost->hostname; + + TrainingStatusRS = rrdset_create_localhost( + "netdata", // type + IdSS.str().c_str(), // id + NameSS.str().c_str(), // name + "ml", // family + "netdata.training_status", // ctx + "Training status of dimensions", // title + "dimensions", // units + "netdata", // plugin + "ml", // module + NETDATA_ML_CHART_PRIO_TRAINING_STATUS, // priority + RH->rrd_update_every, // update_every + RRDSET_TYPE_LINE // chart_type + ); + + rrdset_flag_set(TrainingStatusRS, RRDSET_FLAG_ANOMALY_DETECTION); + + Untrained = rrddim_add(TrainingStatusRS, "untrained", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + PendingWithoutModel = rrddim_add(TrainingStatusRS, "pending-without-model", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + Trained = rrddim_add(TrainingStatusRS, "trained", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + PendingWithModel = rrddim_add(TrainingStatusRS, "pending-with-model", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(TrainingStatusRS, Untrained, MLS.NumTrainingStatusUntrained); + rrddim_set_by_pointer(TrainingStatusRS, PendingWithoutModel, MLS.NumTrainingStatusPendingWithoutModel); + rrddim_set_by_pointer(TrainingStatusRS, Trained, MLS.NumTrainingStatusTrained); + rrddim_set_by_pointer(TrainingStatusRS, PendingWithModel, MLS.NumTrainingStatusPendingWithModel); + + rrdset_done(TrainingStatusRS); } - rrddim_set_by_pointer(RS, NumTotalDimensionsRD, NumNormalDimensions + NumAnomalousDimensions); - rrddim_set_by_pointer(RS, NumTrainedDimensionsRD, NumTrainedDimensions); - rrddim_set_by_pointer(RS, NumNormalDimensionsRD, NumNormalDimensions); - rrddim_set_by_pointer(RS, NumAnomalousDimensionsRD, NumAnomalousDimensions); + /* + * Prediction status + */ + { + static thread_local RRDSET *PredictionRS = nullptr; + + static thread_local RRDDIM *Anomalous = nullptr; + static thread_local RRDDIM *Normal = nullptr; + + if (!PredictionRS) { + std::stringstream IdSS, NameSS; + + IdSS << "dimensions_on_" << localhost->machine_guid; + NameSS << "dimensions_on_" << localhost->hostname; + + PredictionRS = rrdset_create( + RH, + "anomaly_detection", // type + IdSS.str().c_str(), // id + NameSS.str().c_str(), // name + "dimensions", // family + "anomaly_detection.dimensions", // ctx + "Anomaly detection dimensions", // title + "dimensions", // units + "netdata", // plugin + "ml", // module + ML_CHART_PRIO_DIMENSIONS, // priority + RH->rrd_update_every, // update_every + RRDSET_TYPE_LINE // chart_type + ); + rrdset_flag_set(PredictionRS, RRDSET_FLAG_ANOMALY_DETECTION); + + Anomalous = rrddim_add(PredictionRS, "anomalous", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + Normal = rrddim_add(PredictionRS, "normal", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(PredictionRS, Anomalous, MLS.NumAnomalousDimensions); + rrddim_set_by_pointer(PredictionRS, Normal, MLS.NumNormalDimensions); + + rrdset_done(PredictionRS); + } - rrdset_done(RS); } void ml::updateHostAndDetectionRateCharts(RRDHOST *RH, collected_number AnomalyRate) { @@ -75,7 +202,7 @@ void ml::updateHostAndDetectionRateCharts(RRDHOST *RH, collected_number AnomalyR "percentage", // units "netdata", // plugin "ml", // module - 39184, // priority + ML_CHART_PRIO_ANOMALY_RATE, // priority RH->rrd_update_every, // update_every RRDSET_TYPE_LINE // chart_type ); @@ -109,7 +236,7 @@ void ml::updateHostAndDetectionRateCharts(RRDHOST *RH, collected_number AnomalyR "percentage", // units "netdata", // plugin "ml", // module - 39185, // priority + ML_CHART_PRIO_DETECTOR_EVENTS, // priority RH->rrd_update_every, // update_every RRDSET_TYPE_LINE // chart_type ); @@ -143,6 +270,7 @@ void ml::updateHostAndDetectionRateCharts(RRDHOST *RH, collected_number AnomalyR 0, /* tier */ QUERY_SOURCE_ML ); + if(R) { assert(R->d == 1 && R->n == 1 && R->rows == 1); @@ -157,77 +285,227 @@ void ml::updateHostAndDetectionRateCharts(RRDHOST *RH, collected_number AnomalyR rrdr_free(OWA, R); } + onewayalloc_destroy(OWA); } -void ml::updateDetectionChart(RRDHOST *RH) { - static thread_local RRDSET *RS = nullptr; - static thread_local RRDDIM *UserRD, *SystemRD = nullptr; - - if (!RS) { - std::stringstream IdSS, NameSS; - - IdSS << "prediction_stats_" << RH->machine_guid; - NameSS << "prediction_stats_for_" << RH->hostname; - - RS = rrdset_create_localhost( - "netdata", // type - IdSS.str().c_str(), // id - NameSS.str().c_str(), // name - "ml", // family - "netdata.prediction_stats", // ctx - "Prediction thread CPU usage", // title - "milliseconds/s", // units - "netdata", // plugin - "ml", // module - 136000, // priority - RH->rrd_update_every, // update_every - RRDSET_TYPE_STACKED // chart_type - ); - - UserRD = rrddim_add(RS, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); - SystemRD = rrddim_add(RS, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); +void ml::updateResourceUsageCharts(RRDHOST *RH, const struct rusage &PredictionRU, const struct rusage &TrainingRU) { + /* + * prediction rusage + */ + { + static thread_local RRDSET *RS = nullptr; + + static thread_local RRDDIM *User = nullptr; + static thread_local RRDDIM *System = nullptr; + + if (!RS) { + std::stringstream IdSS, NameSS; + + IdSS << "prediction_usage_for_" << localhost->machine_guid; + NameSS << "prediction_usage_for_" << localhost->hostname; + + RS = rrdset_create_localhost( + "netdata", // type + IdSS.str().c_str(), // id + NameSS.str().c_str(), // name + "ml", // family + "netdata.prediction_usage", // ctx + "Prediction resource usage", // title + "milliseconds/s", // units + "netdata", // plugin + "ml", // module + NETDATA_ML_CHART_PRIO_PREDICTION_USAGE, // priority + RH->rrd_update_every, // update_every + RRDSET_TYPE_STACKED // chart_type + ); + rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION); + + User = rrddim_add(RS, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + System = rrddim_add(RS, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(RS, User, PredictionRU.ru_utime.tv_sec * 1000000ULL + PredictionRU.ru_utime.tv_usec); + rrddim_set_by_pointer(RS, System, PredictionRU.ru_stime.tv_sec * 1000000ULL + PredictionRU.ru_stime.tv_usec); + + rrdset_done(RS); } - struct rusage TRU; - getrusage(RUSAGE_THREAD, &TRU); - - rrddim_set_by_pointer(RS, UserRD, TRU.ru_utime.tv_sec * 1000000ULL + TRU.ru_utime.tv_usec); - rrddim_set_by_pointer(RS, SystemRD, TRU.ru_stime.tv_sec * 1000000ULL + TRU.ru_stime.tv_usec); - rrdset_done(RS); + /* + * training rusage + */ + { + static thread_local RRDSET *RS = nullptr; + + static thread_local RRDDIM *User = nullptr; + static thread_local RRDDIM *System = nullptr; + + if (!RS) { + std::stringstream IdSS, NameSS; + + IdSS << "training_usage_for_" << localhost->machine_guid; + NameSS << "training_usage_for_" << localhost->hostname; + + RS = rrdset_create_localhost( + "netdata", // type + IdSS.str().c_str(), // id + NameSS.str().c_str(), // name + "ml", // family + "netdata.training_usage", // ctx + "Training resource usage", // title + "milliseconds/s", // units + "netdata", // plugin + "ml", // module + NETDATA_ML_CHART_PRIO_TRAINING_USAGE, // priority + RH->rrd_update_every, // update_every + RRDSET_TYPE_STACKED // chart_type + ); + rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION); + + User = rrddim_add(RS, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + System = rrddim_add(RS, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(RS, User, TrainingRU.ru_utime.tv_sec * 1000000ULL + TrainingRU.ru_utime.tv_usec); + rrddim_set_by_pointer(RS, System, TrainingRU.ru_stime.tv_sec * 1000000ULL + TrainingRU.ru_stime.tv_usec); + + rrdset_done(RS); + } } -void ml::updateTrainingChart(RRDHOST *RH, struct rusage *TRU) { - static thread_local RRDSET *RS = nullptr; - static thread_local RRDDIM *UserRD = nullptr; - static thread_local RRDDIM *SystemRD = nullptr; - - if (!RS) { - std::stringstream IdSS, NameSS; - - IdSS << "training_stats_" << RH->machine_guid; - NameSS << "training_stats_for_" << RH->hostname; - - RS = rrdset_create_localhost( - "netdata", // type - IdSS.str().c_str(), // id - NameSS.str().c_str(), // name - "ml", // family - "netdata.training_stats", // ctx - "Training thread CPU usage", // title - "milliseconds/s", // units - "netdata", // plugin - "ml", // module - 136001, // priority - RH->rrd_update_every, // update_every - RRDSET_TYPE_STACKED // chart_type - ); +void ml::updateTrainingStatisticsChart(RRDHOST *RH, const TrainingStats &TS) { + /* + * queue stats + */ + { + static thread_local RRDSET *RS = nullptr; + + static thread_local RRDDIM *QueueSize = nullptr; + static thread_local RRDDIM *PoppedItems = nullptr; + + if (!RS) { + std::stringstream IdSS, NameSS; + + IdSS << "queue_stats_for_" << localhost->machine_guid; + NameSS << "queue_stats_for_" << localhost->hostname; + + RS = rrdset_create_localhost( + "netdata", // type + IdSS.str().c_str(), // id + NameSS.str().c_str(), // name + "ml", // family + "netdata.queue_stats", // ctx + "Training queue stats", // title + "items", // units + "netdata", // plugin + "ml", // module + NETDATA_ML_CHART_PRIO_QUEUE_STATS, // priority + RH->rrd_update_every, // update_every + RRDSET_TYPE_LINE// chart_type + ); + rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION); + + QueueSize = rrddim_add(RS, "queue_size", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + PoppedItems = rrddim_add(RS, "popped_items", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(RS, QueueSize, TS.QueueSize); + rrddim_set_by_pointer(RS, PoppedItems, TS.NumPoppedItems); + + rrdset_done(RS); + } - UserRD = rrddim_add(RS, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); - SystemRD = rrddim_add(RS, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + /* + * training stats + */ + { + static thread_local RRDSET *RS = nullptr; + + static thread_local RRDDIM *Allotted = nullptr; + static thread_local RRDDIM *Consumed = nullptr; + static thread_local RRDDIM *Remaining = nullptr; + + if (!RS) { + std::stringstream IdSS, NameSS; + + IdSS << "training_time_stats_for_" << localhost->machine_guid; + NameSS << "training_time_stats_for_" << localhost->hostname; + + RS = rrdset_create_localhost( + "netdata", // type + IdSS.str().c_str(), // id + NameSS.str().c_str(), // name + "ml", // family + "netdata.training_time_stats", // ctx + "Training time stats", // title + "milliseconds", // units + "netdata", // plugin + "ml", // module + NETDATA_ML_CHART_PRIO_TRAINING_TIME_STATS, // priority + RH->rrd_update_every, // update_every + RRDSET_TYPE_LINE// chart_type + ); + rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION); + + Allotted = rrddim_add(RS, "allotted", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + Consumed = rrddim_add(RS, "consumed", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + Remaining = rrddim_add(RS, "remaining", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(RS, Allotted, TS.AllottedUT); + rrddim_set_by_pointer(RS, Consumed, TS.ConsumedUT); + rrddim_set_by_pointer(RS, Remaining, TS.RemainingUT); + + rrdset_done(RS); } - rrddim_set_by_pointer(RS, UserRD, TRU->ru_utime.tv_sec * 1000000ULL + TRU->ru_utime.tv_usec); - rrddim_set_by_pointer(RS, SystemRD, TRU->ru_stime.tv_sec * 1000000ULL + TRU->ru_stime.tv_usec); - rrdset_done(RS); + /* + * training result stats + */ + { + static thread_local RRDSET *RS = nullptr; + + static thread_local RRDDIM *Ok = nullptr; + static thread_local RRDDIM *InvalidQueryTimeRange = nullptr; + static thread_local RRDDIM *NotEnoughCollectedValues = nullptr; + static thread_local RRDDIM *NullAcquiredDimension = nullptr; + static thread_local RRDDIM *ChartUnderReplication = nullptr; + + if (!RS) { + std::stringstream IdSS, NameSS; + + IdSS << "training_results_for_" << localhost->machine_guid; + NameSS << "training_results_for_" << localhost->hostname; + + RS = rrdset_create_localhost( + "netdata", // type + IdSS.str().c_str(), // id + NameSS.str().c_str(), // name + "ml", // family + "netdata.training_results", // ctx + "Training results", // title + "events", // units + "netdata", // plugin + "ml", // module + NETDATA_ML_CHART_PRIO_TRAINING_RESULTS, // priority + RH->rrd_update_every, // update_every + RRDSET_TYPE_LINE// chart_type + ); + rrdset_flag_set(RS, RRDSET_FLAG_ANOMALY_DETECTION); + + Ok = rrddim_add(RS, "ok", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + InvalidQueryTimeRange = rrddim_add(RS, "invalid-queries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + NotEnoughCollectedValues = rrddim_add(RS, "not-enough-values", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + NullAcquiredDimension = rrddim_add(RS, "null-acquired-dimensions", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ChartUnderReplication = rrddim_add(RS, "chart-under-replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(RS, Ok, TS.TrainingResultOk); + rrddim_set_by_pointer(RS, InvalidQueryTimeRange, TS.TrainingResultInvalidQueryTimeRange); + rrddim_set_by_pointer(RS, NotEnoughCollectedValues, TS.TrainingResultNotEnoughCollectedValues); + rrddim_set_by_pointer(RS, NullAcquiredDimension, TS.TrainingResultNullAcquiredDimension); + rrddim_set_by_pointer(RS, ChartUnderReplication, TS.TrainingResultChartUnderReplication); + + rrdset_done(RS); + } } diff --git a/ml/ADCharts.h b/ml/ADCharts.h index 0be324f7d7..ee09669e22 100644 --- a/ml/ADCharts.h +++ b/ml/ADCharts.h @@ -3,20 +3,18 @@ #ifndef ML_ADCHARTS_H #define ML_ADCHARTS_H +#include "Stats.h" #include "ml-private.h" namespace ml { -void updateDimensionsChart(RRDHOST *RH, - collected_number NumTrainedDimensions, - collected_number NumNormalDimensions, - collected_number NumAnomalousDimensions); +void updateDimensionsChart(RRDHOST *RH, const MachineLearningStats &MLS); void updateHostAndDetectionRateCharts(RRDHOST *RH, collected_number AnomalyRate); -void updateDetectionChart(RRDHOST *RH); +void updateResourceUsageCharts(RRDHOST *RH, const struct rusage &PredictionRU, const struct rusage &TrainingRU); -void updateTrainingChart(RRDHOST *RH, struct rusage *TRU); +void updateTrainingStatisticsChart(RRDHOST *RH, const TrainingStats &TS); } // namespace ml diff --git a/ml/Chart.cc b/ml/Chart.cc new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/ml/Chart.cc diff --git a/ml/Chart.h b/ml/Chart.h new file mode 100644 index 0000000000..dbd6a910f9 --- /dev/null +++ b/ml/Chart.h @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ML_CHART_H +#define ML_CHART_H + +#include "Config.h" +#include "Dimension.h" + +#include "ml-private.h" +#include "json/single_include/nlohmann/json.hpp" + +namespace ml +{ + +class Chart { +public: + Chart(RRDSET *RS) : + RS(RS), + MLS() + { } + + RRDSET *getRS() const { + return RS; + } + + bool isAvailableForML() { + return rrdset_is_available_for_exporting_and_alarms(RS); + } + + void addDimension(Dimension *D) { + std::lock_guard<Mutex> L(M); + Dimensions[D->getRD()] = D; + } + + void removeDimension(Dimension *D) { + std::lock_guard<Mutex> L(M); + Dimensions.erase(D->getRD()); + } + + void getModelsAsJson(nlohmann::json &Json) { + std::lock_guard<Mutex> L(M); + + for (auto &DP : Dimensions) { + Dimension *D = DP.second; + nlohmann::json JsonArray = nlohmann::json::array(); + for (const KMeans &KM : D->getModels()) { + nlohmann::json J; + KM.toJson(J); + JsonArray.push_back(J); + } + + Json[getMLDimensionID(D->getRD())] = JsonArray; + } + } + + void updateBegin() { + M.lock(); + MLS = {}; + } + + void updateDimension(Dimension *D, bool IsAnomalous) { + switch (D->getMLS()) { + case MachineLearningStatus::DisabledDueToUniqueUpdateEvery: + MLS.NumMachineLearningStatusDisabledUE++; + return; + case MachineLearningStatus::DisabledDueToExcludedChart: + MLS.NumMachineLearningStatusDisabledSP++; + return; + case MachineLearningStatus::Enabled: { + MLS.NumMachineLearningStatusEnabled++; + + switch (D->getMT()) { + case MetricType::Constant: + MLS.NumMetricTypeConstant++; + MLS.NumTrainingStatusTrained++; + MLS.NumNormalDimensions++; + return; + case MetricType::Variable: + MLS.NumMetricTypeVariable++; + break; + } + + switch (D->getTS()) { + case TrainingStatus::Untrained: + MLS.NumTrainingStatusUntrained++; + return; + case TrainingStatus::PendingWithoutModel: + MLS.NumTrainingStatusPendingWithoutModel++; + return; + case TrainingStatus::Trained: + MLS.NumTrainingStatusTrained++; + + MLS.NumAnomalousDimensions += IsAnomalous; + MLS.NumNormalDimensions += !IsAnomalous; + return; + case TrainingStatus::PendingWithModel: + MLS.NumTrainingStatusPendingWithModel++; + + MLS.NumAnomalousDimensions += IsAnomalous; + MLS.NumNormalDimensions += !IsAnomalous; + return; + } + + return; + } + } + } + + void updateEnd() { + M.unlock(); + } + + MachineLearningStats getMLS() { + std::lock_guard<Mutex> L(M); + return MLS; + } + +private: + RRDSET *RS; + MachineLearningStats MLS; + + Mutex M; + std::unordered_map<RRDDIM *, Dimension *> Dimensions; +}; + +} // namespace ml + +#endif /* ML_CHART_H */ diff --git a/ml/Config.cc b/ml/Config.cc index eedd8c29fd..ba3a614452 100644 --- a/ml/Config.cc +++ b/ml/Config.cc @@ -31,7 +31,7 @@ void Config::readMLConfig(void) { unsigned MaxTrainSamples = config_get_number(ConfigSectionML, "maximum num samples to train", 4 * 3600); unsigned MinTrainSamples = config_get_number(ConfigSectionML, "minimum num samples to train", 1 * 900); unsigned TrainEvery = config_get_number(ConfigSectionML, "train every", 1 * 3600); - unsigned NumModelsToUse = config_get_number(ConfigSectionML, "number of models per dimension", 1 * 24); + unsigned NumModelsToUse = config_get_number(ConfigSectionML, "number of models per dimension", 1); unsigned DiffN = config_get_number(ConfigSectionML, "num samples to diff", 1); unsigned SmoothN = config_get_number(ConfigSectionML, "num samples to smooth", 3); @@ -53,7 +53,7 @@ void Config::readMLConfig(void) { MaxTrainSamples = clamp<unsigned>(MaxTrainSamples, 1 * 3600, 24 * 3600); MinTrainSamples = clamp<unsigned>(MinTrainSamples, 1 * 900, 6 * 3600); TrainEvery = clamp<unsigned>(TrainEvery, 1 * 3600, 6 * 3600); - NumModelsToUse = clamp<unsigned>(TrainEvery, 1, 7 * 24); + NumModelsToUse = clamp<unsigned>(NumModelsToUse, 1, 7 * 24); DiffN = clamp(DiffN, 0u, 1u); SmoothN = clamp(SmoothN, 0u, 5u); @@ -108,7 +108,7 @@ void Config::readMLConfig(void) { // Always exclude anomaly_detection charts from training. Cfg.ChartsToSkip = "anomaly_detection.* "; Cfg.ChartsToSkip += config_get(ConfigSectionML, "charts to skip from training", "netdata.*"); - Cfg.SP_ChartsToSkip = simple_pattern_create(ChartsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT); + Cfg.SP_ChartsToSkip = simple_pattern_create(Cfg.ChartsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT); Cfg.StreamADCharts = config_get_boolean(ConfigSectionML, "stream anomaly detection charts", true); } |