Remove anomaly detector (#13657)

* Move all dims under one class. * Dimension owns anomaly rate RD. * Remove Dimension::isAnomalous() * Remove Dimension::trainEvery() * Rm ml/kmeans * Remove anomaly detector The same logic can be implemented by using the host anomaly rate dim. * Profile plugin. * Revert "Profile plugin." This reverts commit e3db37cb49c514502c5216cfe7bca2a003fb90f1. * Add separate source files for anomaly detection charts. * Handle training/prediction sync at the dimension level. * Keep multiple KMeans models in mem. * Move feature extraction outside KMeans class. * Use multiple models. * Add /api/v1/ml_models endpoint. * Remove Dimension::getID() * Use just 1 model and fix tests. * Add detection logic based on rrdr. * Remove config options related to anomaly detection. * Make anomaly detection queries configurable. * Fix ad query duration option. * Finalize queries in all code paths. * Check if query was initialized before finalizing it * Do not leak OWA * Profile plugin. * Revert "Profile plugin." This reverts commit 5c77145d0df7e091d030476c480ab8d9cbceb89e. * Change context from anomaly_detection to detector_events.
author: vkalintiris <vasilis@netdata.cloud> 2022-10-05 10:11:12 +0300
committer: GitHub <noreply@github.com> 2022-10-05 10:11:12 +0300
commit: 6850878e697d66dc90b9af1e750b22238c63c292 (patch)
tree: 1e4bf33a393c258ac31bf6971f1ea2b246e1635c /ml/KMeans.cc
parent: 2b7a964d49df6deda32bffbe6141ec53429d68fd (diff)
1 files changed, 43 insertions, 0 deletions
diff --git a/ml/KMeans.cc b/ml/KMeans.cc
new file mode 100644
index 0000000000..edc2ef49ec
--- /dev/null
+++ b/ml/KMeans.cc
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "KMeans.h"
+#include <dlib/clustering.h>
+
+void KMeans::train(const std::vector<DSample> &Samples, size_t MaxIterations) {
+    MinDist = std::numeric_limits<CalculatedNumber>::max();
+    MaxDist = std::numeric_limits<CalculatedNumber>::min();
+
+    ClusterCenters.clear();
+
+    dlib::pick_initial_centers(NumClusters, ClusterCenters, Samples);
+    dlib::find_clusters_using_kmeans(Samples, ClusterCenters, MaxIterations);
+
+    for (const auto &S : Samples) {
+        CalculatedNumber MeanDist = 0.0;
+
+        for (const auto &KMCenter : ClusterCenters)
+            MeanDist += dlib::length(KMCenter - S);
+
+        MeanDist /= NumClusters;
+
+        if (MeanDist < MinDist)
+            MinDist = MeanDist;
+
+        if (MeanDist > MaxDist)
+            MaxDist = MeanDist;
+    }
+}
+
+CalculatedNumber KMeans::anomalyScore(const DSample &Sample) const {
+    CalculatedNumber MeanDist = 0.0;
+    for (const auto &CC: ClusterCenters)
+        MeanDist += dlib::length(CC - Sample);
+
+    MeanDist /= NumClusters;
+
+    if (MaxDist == MinDist)
+        return 0.0;
+
+    CalculatedNumber AnomalyScore = 100.0 * std::abs((MeanDist - MinDist) / (MaxDist - MinDist));
+    return (AnomalyScore > 100.0) ? 100.0 : AnomalyScore;
+}
author	vkalintiris <vasilis@netdata.cloud>	2022-10-05 10:11:12 +0300
committer	GitHub <noreply@github.com>	2022-10-05 10:11:12 +0300
commit	6850878e697d66dc90b9af1e750b22238c63c292 (patch)
tree	1e4bf33a393c258ac31bf6971f1ea2b246e1635c /ml/KMeans.cc
parent	2b7a964d49df6deda32bffbe6141ec53429d68fd (diff)