summaryrefslogtreecommitdiffstats
path: root/ml/Dimension.cc
diff options
context:
space:
mode:
authorvkalintiris <vasilis@netdata.cloud>2022-03-30 13:38:18 +0300
committerGitHub <noreply@github.com>2022-03-30 13:38:18 +0300
commit41a40dc3a406c3c8dc70f41038e7d75ef2601f8b (patch)
treed889c164f2be7588a4c5940cf777ccd94cb39e7a /ml/Dimension.cc
parentffee2317885bf8ceab7224ba23aad08421986cd5 (diff)
ML-related changes to address issue/discussion comments. (#12494)
* Increase training thread's max sleep time. With this change we will only cap the allotted time when it is more than ten seconds. The previous limit was one second, which had the effect of scheduling dimensions near the beggining of each training window. This was not desirable because it would cause high CPU usage on parents with many children. * Only exclude netdata.* charts from training. * Use heartbeat in detection thread. * Track rusage of prediction thread. * Track rusage of training thread. * Add support for random sampling of extracted features. * Rebase * Skip RNG when ML is disabled and fix undef behaviour
Diffstat (limited to 'ml/Dimension.cc')
-rw-r--r--ml/Dimension.cc10
1 files changed, 8 insertions, 2 deletions
diff --git a/ml/Dimension.cc b/ml/Dimension.cc
index 11d1baa8fd..290d4c7439 100644
--- a/ml/Dimension.cc
+++ b/ml/Dimension.cc
@@ -125,8 +125,13 @@ MLResult TrainableDimension::trainModel() {
if (!CNs)
return MLResult::MissingData;
- SamplesBuffer SB = SamplesBuffer(CNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN);
+ unsigned TargetNumSamples = Cfg.MaxTrainSamples * Cfg.RandomSamplingRatio;
+ double SamplingRatio = std::min(static_cast<double>(TargetNumSamples) / N, 1.0);
+
+ SamplesBuffer SB = SamplesBuffer(CNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
+ SamplingRatio, Cfg.RandomNums);
KM.train(SB, Cfg.MaxKMeansIters);
+
Trained = true;
ConstantModel = true;
@@ -162,7 +167,8 @@ std::pair<MLResult, bool> PredictableDimension::predict() {
CalculatedNumber *TmpCNs = new CalculatedNumber[N * (Cfg.LagN + 1)]();
std::memcpy(TmpCNs, CNs.data(), N * sizeof(CalculatedNumber));
- SamplesBuffer SB = SamplesBuffer(TmpCNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN);
+ SamplesBuffer SB = SamplesBuffer(TmpCNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN,
+ 1.0, Cfg.RandomNums);
AnomalyScore = computeAnomalyScore(SB);
delete[] TmpCNs;