summaryrefslogtreecommitdiffstats
path: root/ml/Dimension.h
diff options
context:
space:
mode:
authorvkalintiris <vasilis@netdata.cloud>2022-12-22 13:18:55 +0200
committerGitHub <noreply@github.com>2022-12-22 13:18:55 +0200
commit6f42311c4b32d42798f78de1fd43f53694f24e6e (patch)
treea48e85baea0d2feabdcddf1426a6a3c8c46c5568 /ml/Dimension.h
parentc1aec98b30d8a4e80813cfccd636c31999c7ae3e (diff)
Revert "Refactor ML code and add support for multiple KMeans models. … (#14172)
Diffstat (limited to 'ml/Dimension.h')
-rw-r--r--ml/Dimension.h174
1 files changed, 36 insertions, 138 deletions
diff --git a/ml/Dimension.h b/ml/Dimension.h
index 8666710b6a..3ec56e0981 100644
--- a/ml/Dimension.h
+++ b/ml/Dimension.h
@@ -3,7 +3,6 @@
#ifndef ML_DIMENSION_H
#define ML_DIMENSION_H
-#include "Stats.h"
#include "Query.h"
#include "Config.h"
@@ -11,6 +10,12 @@
namespace ml {
+enum class MLResult {
+ Success = 0,
+ MissingData,
+ NaN,
+};
+
static inline std::string getMLDimensionID(RRDDIM *RD) {
RRDSET *RS = RD->rrdset;
@@ -19,118 +24,16 @@ static inline std::string getMLDimensionID(RRDDIM *RD) {
return SS.str();
}
-enum class MachineLearningStatus {
- // Enable training/prediction
- Enabled,
-
- // Disable due to update every being different from the host's
- DisabledDueToUniqueUpdateEvery,
-
- // Disable because configuration pattern matches the chart's id
- DisabledDueToExcludedChart,
-};
-
-enum class TrainingStatus {
- // We don't have a model for this dimension
- Untrained,
-
- // Request for training sent, but we don't have any models yet
- PendingWithoutModel,
-
- // Request to update existing models sent
- PendingWithModel,
-
- // Have a valid, up-to-date model
- Trained,
-};
-
-enum class MetricType {
- // The dimension has constant values, no need to train
- Constant,
-
- // The dimension's values fluctuate, we need to generate a model
- Variable,
-};
-
-struct TrainingRequest {
- // Chart/dimension we want to train
- STRING *ChartId;
- STRING *DimensionId;
-
- // Creation time of request
- time_t RequestTime;
-
- // First/last entry of this dimension in DB
- // at the point the request was made
- time_t FirstEntryOnRequest;
- time_t LastEntryOnRequest;
-};
-
-void dumpTrainingRequest(const TrainingRequest &TrainingReq, const char *Prefix);
-
-enum TrainingResult {
- // We managed to create a KMeans model
- Ok,
- // Could not query DB with a correct time range
- InvalidQueryTimeRange,
- // Did not gather enough data from DB to run KMeans
- NotEnoughCollectedValues,
- // Acquired a null dimension
- NullAcquiredDimension,
- // Chart is under replication
- ChartUnderReplication,
-};
-
-struct TrainingResponse {
- // Time when the request for this response was made
- time_t RequestTime;
-
- // First/last entry of the dimension in DB when generating the request
- time_t FirstEntryOnRequest;
- time_t LastEntryOnRequest;
-
- // First/last entry of the dimension in DB when generating the response
- time_t FirstEntryOnResponse;
- time_t LastEntryOnResponse;
-
- // After/Before timestamps of our DB query
- time_t QueryAfterT;
- time_t QueryBeforeT;
-
- // Actual after/before returned by the DB query ops
- time_t DbAfterT;
- time_t DbBeforeT;
-
- // Number of doubles returned by the DB query
- size_t CollectedValues;
-
- // Number of values we return to the caller
- size_t TotalValues;
-
- // Result of training response
- TrainingResult Result;
-};
-
-void dumpTrainingResponse(const TrainingResponse &TrainingResp, const char *Prefix);
-
class Dimension {
public:
Dimension(RRDDIM *RD) :
RD(RD),
- MT(MetricType::Constant),
- TS(TrainingStatus::Untrained),
- TR(),
- LastTrainingTime(0)
- {
- if (simple_pattern_matches(Cfg.SP_ChartsToSkip, rrdset_name(RD->rrdset)))
- MLS = MachineLearningStatus::DisabledDueToExcludedChart;
- else if (RD->update_every != RD->rrdset->rrdhost->rrd_update_every)
- MLS = MachineLearningStatus::DisabledDueToUniqueUpdateEvery;
- else
- MLS = MachineLearningStatus::Enabled;
-
- Models.reserve(Cfg.NumModelsToUse);
- }
+ LastTrainedAt(Seconds(0)),
+ Trained(false),
+ ConstantModel(false),
+ AnomalyScore(0.0),
+ AnomalyBit(0)
+ { }
RRDDIM *getRD() const {
return RD;
@@ -140,54 +43,49 @@ public:
return RD->update_every;
}
- MetricType getMT() const {
- return MT;
+ time_t latestTime() const {
+ return Query(RD).latestTime();
}
- TrainingStatus getTS() const {
- return TS;
+ time_t oldestTime() const {
+ return Query(RD).oldestTime();
}
- MachineLearningStatus getMLS() const {
- return MLS;
+ bool isTrained() const {
+ return Trained;
}
- TrainingResult trainModel(const TrainingRequest &TR);
+ bool isAnomalous() const {
+ return AnomalyBit;
+ }
- void scheduleForTraining(time_t CurrT);
+ bool shouldTrain(const TimePoint &TP) const;
- bool predict(time_t CurrT, CalculatedNumber Value, bool Exists);
+ bool isActive() const;
- std::vector<KMeans> getModels();
-
- void dump() const;
+ MLResult trainModel();
-private:
- TrainingRequest getTrainingRequest(time_t CurrT) const {
- return TrainingRequest {
- string_dup(RD->rrdset->id),
- string_dup(RD->id),
- CurrT,
- rrddim_first_entry_t(RD),
- rrddim_last_entry_t(RD)
- };
- }
+ bool predict(CalculatedNumber Value, bool Exists);
+
+ std::pair<bool, double> detect(size_t WindowLength, bool Reset);
+
+ std::array<KMeans, 1> getModels();
private:
- std::pair<CalculatedNumber *, TrainingResponse> getCalculatedNumbers(const TrainingRequest &TrainingReq);
+ std::pair<CalculatedNumber *, size_t> getCalculatedNumbers();
public:
RRDDIM *RD;
- MetricType MT;
- TrainingStatus TS;
- TrainingResponse TR;
- time_t LastTrainingTime;
+ TimePoint LastTrainedAt;
+ std::atomic<bool> Trained;
+ std::atomic<bool> ConstantModel;
- MachineLearningStatus MLS;
+ CalculatedNumber AnomalyScore;
+ std::atomic<bool> AnomalyBit;
std::vector<CalculatedNumber> CNs;
- std::vector<KMeans> Models;
+ std::array<KMeans, 1> Models;
std::mutex Mutex;
};