summaryrefslogtreecommitdiffstats
path: root/ml/Config.cc
diff options
context:
space:
mode:
authorvkalintiris <vasilis@netdata.cloud>2022-03-30 13:38:18 +0300
committerGitHub <noreply@github.com>2022-03-30 13:38:18 +0300
commit41a40dc3a406c3c8dc70f41038e7d75ef2601f8b (patch)
treed889c164f2be7588a4c5940cf777ccd94cb39e7a /ml/Config.cc
parentffee2317885bf8ceab7224ba23aad08421986cd5 (diff)
ML-related changes to address issue/discussion comments. (#12494)
* Increase training thread's max sleep time. With this change we will only cap the allotted time when it is more than ten seconds. The previous limit was one second, which had the effect of scheduling dimensions near the beggining of each training window. This was not desirable because it would cause high CPU usage on parents with many children. * Only exclude netdata.* charts from training. * Use heartbeat in detection thread. * Track rusage of prediction thread. * Track rusage of training thread. * Add support for random sampling of extracted features. * Rebase * Skip RNG when ML is disabled and fix undef behaviour
Diffstat (limited to 'ml/Config.cc')
-rw-r--r--ml/Config.cc8
1 files changed, 4 insertions, 4 deletions
diff --git a/ml/Config.cc b/ml/Config.cc
index f0c9a6af16..7a86aa7bdb 100644
--- a/ml/Config.cc
+++ b/ml/Config.cc
@@ -38,6 +38,7 @@ void Config::readMLConfig(void) {
unsigned SmoothN = config_get_number(ConfigSectionML, "num samples to smooth", 3);
unsigned LagN = config_get_number(ConfigSectionML, "num samples to lag", 5);
+ double RandomSamplingRatio = config_get_float(ConfigSectionML, "random sampling ratio", 1.0 / LagN);
unsigned MaxKMeansIters = config_get_number(ConfigSectionML, "maximum number of k-means iterations", 1000);
double DimensionAnomalyScoreThreshold = config_get_float(ConfigSectionML, "dimension anomaly score threshold", 0.99);
@@ -67,6 +68,7 @@ void Config::readMLConfig(void) {
SmoothN = clamp(SmoothN, 0u, 5u);
LagN = clamp(LagN, 0u, 5u);
+ RandomSamplingRatio = clamp(RandomSamplingRatio, 0.2, 1.0);
MaxKMeansIters = clamp(MaxKMeansIters, 500u, 1000u);
DimensionAnomalyScoreThreshold = clamp(DimensionAnomalyScoreThreshold, 0.01, 5.00);
@@ -112,6 +114,7 @@ void Config::readMLConfig(void) {
Cfg.SmoothN = SmoothN;
Cfg.LagN = LagN;
+ Cfg.RandomSamplingRatio = RandomSamplingRatio;
Cfg.MaxKMeansIters = MaxKMeansIters;
Cfg.DimensionAnomalyScoreThreshold = DimensionAnomalyScoreThreshold;
@@ -128,9 +131,6 @@ void Config::readMLConfig(void) {
// Always exclude anomaly_detection charts from training.
Cfg.ChartsToSkip = "anomaly_detection.* ";
- Cfg.ChartsToSkip += config_get(ConfigSectionML, "charts to skip from training",
- "!system.* !cpu.* !mem.* !disk.* !disk_* "
- "!ip.* !ipv4.* !ipv6.* !net.* !net_* !netfilter.* "
- "!services.* !apps.* !groups.* !user.* !ebpf.* !netdata.* *");
+ Cfg.ChartsToSkip += config_get(ConfigSectionML, "charts to skip from training", "netdata.*");
Cfg.SP_ChartsToSkip = simple_pattern_create(ChartsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT);
}