summaryrefslogtreecommitdiffstats
path: root/ml/KMeans.cc
diff options
context:
space:
mode:
authorvkalintiris <vasilis@netdata.cloud>2022-10-05 10:11:12 +0300
committerGitHub <noreply@github.com>2022-10-05 10:11:12 +0300
commit6850878e697d66dc90b9af1e750b22238c63c292 (patch)
tree1e4bf33a393c258ac31bf6971f1ea2b246e1635c /ml/KMeans.cc
parent2b7a964d49df6deda32bffbe6141ec53429d68fd (diff)
Remove anomaly detector (#13657)
* Move all dims under one class. * Dimension owns anomaly rate RD. * Remove Dimension::isAnomalous() * Remove Dimension::trainEvery() * Rm ml/kmeans * Remove anomaly detector The same logic can be implemented by using the host anomaly rate dim. * Profile plugin. * Revert "Profile plugin." This reverts commit e3db37cb49c514502c5216cfe7bca2a003fb90f1. * Add separate source files for anomaly detection charts. * Handle training/prediction sync at the dimension level. * Keep multiple KMeans models in mem. * Move feature extraction outside KMeans class. * Use multiple models. * Add /api/v1/ml_models endpoint. * Remove Dimension::getID() * Use just 1 model and fix tests. * Add detection logic based on rrdr. * Remove config options related to anomaly detection. * Make anomaly detection queries configurable. * Fix ad query duration option. * Finalize queries in all code paths. * Check if query was initialized before finalizing it * Do not leak OWA * Profile plugin. * Revert "Profile plugin." This reverts commit 5c77145d0df7e091d030476c480ab8d9cbceb89e. * Change context from anomaly_detection to detector_events.
Diffstat (limited to 'ml/KMeans.cc')
-rw-r--r--ml/KMeans.cc43
1 files changed, 43 insertions, 0 deletions
diff --git a/ml/KMeans.cc b/ml/KMeans.cc
new file mode 100644
index 0000000000..edc2ef49ec
--- /dev/null
+++ b/ml/KMeans.cc
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "KMeans.h"
+#include <dlib/clustering.h>
+
+void KMeans::train(const std::vector<DSample> &Samples, size_t MaxIterations) {
+ MinDist = std::numeric_limits<CalculatedNumber>::max();
+ MaxDist = std::numeric_limits<CalculatedNumber>::min();
+
+ ClusterCenters.clear();
+
+ dlib::pick_initial_centers(NumClusters, ClusterCenters, Samples);
+ dlib::find_clusters_using_kmeans(Samples, ClusterCenters, MaxIterations);
+
+ for (const auto &S : Samples) {
+ CalculatedNumber MeanDist = 0.0;
+
+ for (const auto &KMCenter : ClusterCenters)
+ MeanDist += dlib::length(KMCenter - S);
+
+ MeanDist /= NumClusters;
+
+ if (MeanDist < MinDist)
+ MinDist = MeanDist;
+
+ if (MeanDist > MaxDist)
+ MaxDist = MeanDist;
+ }
+}
+
+CalculatedNumber KMeans::anomalyScore(const DSample &Sample) const {
+ CalculatedNumber MeanDist = 0.0;
+ for (const auto &CC: ClusterCenters)
+ MeanDist += dlib::length(CC - Sample);
+
+ MeanDist /= NumClusters;
+
+ if (MaxDist == MinDist)
+ return 0.0;
+
+ CalculatedNumber AnomalyScore = 100.0 * std::abs((MeanDist - MinDist) / (MaxDist - MinDist));
+ return (AnomalyScore > 100.0) ? 100.0 : AnomalyScore;
+}