summaryrefslogtreecommitdiffstats
path: root/ml/Dimension.h
diff options
context:
space:
mode:
authorvkalintiris <vasilis@netdata.cloud>2022-02-24 10:57:30 +0200
committerGitHub <noreply@github.com>2022-02-24 10:57:30 +0200
commit69ea17d6ec534e1ed796a92fd042bd76a3ca9215 (patch)
tree4345e3405b2ac1e37a9be1615d6dad799e15327d /ml/Dimension.h
parent8756eb80c77caf411f7fae605d3f1bb03dd60b76 (diff)
Track anomaly rates with DBEngine. (#12083)
* Track anomaly rates with DBEngine. This commit adds support for tracking anomaly rates with DBEngine. We do so by creating a single chart with id "anomaly_detection.anomaly_rates" for each trainable/predictable host, which is responsible for tracking the anomaly rate of each dimension that we train/predict for that host. The rrdset->state->is_ar_chart boolean flag is set to true only for anomaly rates charts. We use this flag to: - Disable exposing the anomaly rates charts through the functionality in backends/, exporting/ and streaming/. - Skip generation of configuration options for the name, algorithm, multiplier, divisor of each dimension in an anomaly rates chart. - Skip the creation of health variables for anomaly rates dimensions. - Skip the chart/dim queue of ACLK. - Post-process the RRDR result of an anomaly rates chart, so that we can return a sorted, trimmed number of anomalous dimensions. In a child/parent configuration where both the child and the parent run ML for the child, we want to be able to stream the rest of the ML-related charts to the parent. To be able to do this without any chart name collisions, the charts are now created on localhost and their IDs and titles have the node's machine_guid and hostname as a suffix, respectively. * Fix exporting_engine tests. * Restore default ML configuration. The reverted changes where meant for local testing only. This commit restores the default values that we want to have when someone runs anomaly detection on their node. * Set context for anomaly_detection.* charts. * Check for anomaly rates chart only with a valid pointer. * Remove duplicate code. * Use a more descriptive name for id/title pair variable
Diffstat (limited to 'ml/Dimension.h')
-rw-r--r--ml/Dimension.h36
1 files changed, 29 insertions, 7 deletions
diff --git a/ml/Dimension.h b/ml/Dimension.h
index 903a7540ef..3b399d351a 100644
--- a/ml/Dimension.h
+++ b/ml/Dimension.h
@@ -12,11 +12,7 @@ namespace ml {
class RrdDimension {
public:
- RrdDimension(RRDDIM *RD) : RD(RD), Ops(&RD->state->query_ops) {
- std::stringstream SS;
- SS << RD->rrdset->id << "|" << RD->name;
- ID = SS.str();
- }
+ RrdDimension(RRDDIM *RD) : RD(RD), Ops(&RD->state->query_ops) { }
RRDDIM *getRD() const { return RD; }
@@ -26,12 +22,27 @@ public:
unsigned updateEvery() const { return RD->update_every; }
- const std::string getID() const { return ID; }
+ const std::string getID() const {
+ std::stringstream SS;
+ SS << RD->rrdset->id << "|" << RD->name;
+ return SS.str();
+ }
+
+ void setAnomalyRateRD(RRDDIM *ARRD) { AnomalyRateRD = ARRD; }
+ RRDDIM *getAnomalyRateRD() const { return AnomalyRateRD; }
- virtual ~RrdDimension() {}
+ void setAnomalyRateRDName(const char *Name) const {
+ rrddim_set_name(AnomalyRateRD->rrdset, AnomalyRateRD, Name);
+ }
+
+ virtual ~RrdDimension() {
+ rrddim_free_custom(AnomalyRateRD->rrdset, AnomalyRateRD, 0);
+ }
private:
RRDDIM *RD;
+ RRDDIM *AnomalyRateRD;
+
struct rrddim_volatile::rrddim_query_ops *Ops;
std::string ID;
@@ -94,9 +105,20 @@ public:
bool isAnomalous() { return AnomalyBit; }
+ void updateAnomalyBitCounter(RRDSET *RS, unsigned Elapsed, bool IsAnomalous) {
+ AnomalyBitCounter += IsAnomalous;
+
+ if (Elapsed == Cfg.DBEngineAnomalyRateEvery) {
+ double AR = static_cast<double>(AnomalyBitCounter) / Cfg.DBEngineAnomalyRateEvery;
+ rrddim_set_by_pointer(RS, getAnomalyRateRD(), AR * 1000);
+ AnomalyBitCounter = 0;
+ }
+ }
+
private:
CalculatedNumber AnomalyScore{0.0};
std::atomic<bool> AnomalyBit{false};
+ unsigned AnomalyBitCounter{0};
std::vector<CalculatedNumber> CNs;
};