summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorvkalintiris <vasilis@netdata.cloud>2023-04-13 20:29:52 +0300
committerGitHub <noreply@github.com>2023-04-13 20:29:52 +0300
commit003df5f2b76973f898b44742b7e071ff2654343a (patch)
tree46183f6a35edb887ab8f3de8a1d6e398438a6a0f /daemon
parent40f69dc20f7e40b7155d29a3f735ec4af29f4865 (diff)
Save and load ML models (#14810)
* Revert "Revert "Use static thread-pool for training. (#14702)" (#14782)" This reverts commit 5321ca8d1ef8d974a6a2b2128ca8804de6acb693. * Model I/O. * Minor changes Meant to make debugging a crash issues easier on cloud VMs: - Less verbose logging - Higher logging history - Modify installer to use debug info by default * Fix ML initialization order. * read lock hosts when running detection. * Revert debugging changes. * Update ml/Config.cc Co-authored-by: Andrew Maguire <andrewm4894@gmail.com> --------- Co-authored-by: Andrew Maguire <andrewm4894@gmail.com>
Diffstat (limited to 'daemon')
-rw-r--r--daemon/global_statistics.c28
-rw-r--r--daemon/main.c18
-rw-r--r--daemon/main.h20
3 files changed, 19 insertions, 47 deletions
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index 0dc3ee6452..ee68bebd15 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -827,33 +827,7 @@ static void global_statistics_charts(void) {
rrdset_done(st_points_stored);
}
- {
- static RRDSET *st = NULL;
- static RRDDIM *rd = NULL;
-
- if (unlikely(!st)) {
- st = rrdset_create_localhost(
- "netdata" // type
- , "ml_models_consulted" // id
- , NULL // name
- , NETDATA_ML_CHART_FAMILY // family
- , NULL // context
- , "KMeans models used for prediction" // title
- , "models" // units
- , NETDATA_ML_PLUGIN // plugin
- , NETDATA_ML_MODULE_DETECTION // module
- , NETDATA_ML_CHART_PRIO_MACHINE_LEARNING_STATUS // priority
- , localhost->rrd_update_every // update_every
- , RRDSET_TYPE_AREA // chart_type
- );
-
- rd = rrddim_add(st, "num_models_consulted", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set_by_pointer(st, rd, (collected_number) gs.ml_models_consulted);
-
- rrdset_done(st);
- }
+ ml_update_global_statistics_charts(gs.ml_models_consulted);
}
// ----------------------------------------------------------------------------
diff --git a/daemon/main.c b/daemon/main.c
index 682106b78e..478b5d002c 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -148,10 +148,6 @@ static void service_to_buffer(BUFFER *wb, SERVICE_TYPE service) {
buffer_strcat(wb, "MAINTENANCE ");
if(service & SERVICE_COLLECTORS)
buffer_strcat(wb, "COLLECTORS ");
- if(service & SERVICE_ML_TRAINING)
- buffer_strcat(wb, "ML_TRAINING ");
- if(service & SERVICE_ML_PREDICTION)
- buffer_strcat(wb, "ML_PREDICTION ");
if(service & SERVICE_REPLICATION)
buffer_strcat(wb, "REPLICATION ");
if(service & ABILITY_DATA_QUERIES)
@@ -340,6 +336,11 @@ void netdata_cleanup_and_exit(int ret) {
}
#endif
+ delta_shutdown_time("disable ML detection and training threads");
+
+ ml_stop_threads();
+ ml_fini();
+
delta_shutdown_time("disable maintenance, new queries, new web requests, new streaming connections and aclk");
service_signal_exit(
@@ -351,12 +352,11 @@ void netdata_cleanup_and_exit(int ret) {
| SERVICE_ACLKSYNC
);
- delta_shutdown_time("stop replication, exporters, ML training, health and web servers threads");
+ delta_shutdown_time("stop replication, exporters, health and web servers threads");
timeout = !service_wait_exit(
SERVICE_REPLICATION
| SERVICE_EXPORTERS
- | SERVICE_ML_TRAINING
| SERVICE_HEALTH
| SERVICE_WEB_SERVER
, 3 * USEC_PER_SEC);
@@ -368,11 +368,10 @@ void netdata_cleanup_and_exit(int ret) {
| SERVICE_STREAMING
, 3 * USEC_PER_SEC);
- delta_shutdown_time("stop ML prediction and context threads");
+ delta_shutdown_time("stop context thread");
timeout = !service_wait_exit(
- SERVICE_ML_PREDICTION
- | SERVICE_CONTEXT
+ SERVICE_CONTEXT
, 3 * USEC_PER_SEC);
delta_shutdown_time("stop maintenance thread");
@@ -2085,6 +2084,7 @@ int main(int argc, char **argv) {
}
else debug(D_SYSTEM, "Not starting thread %s.", st->name);
}
+ ml_start_threads();
// ------------------------------------------------------------------------
// Initialize netdata agent command serving from cli and signals
diff --git a/daemon/main.h b/daemon/main.h
index 7e659e939a..3e32c5ad6d 100644
--- a/daemon/main.h
+++ b/daemon/main.h
@@ -33,17 +33,15 @@ typedef enum {
ABILITY_STREAMING_CONNECTIONS = (1 << 2),
SERVICE_MAINTENANCE = (1 << 3),
SERVICE_COLLECTORS = (1 << 4),
- SERVICE_ML_TRAINING = (1 << 5),
- SERVICE_ML_PREDICTION = (1 << 6),
- SERVICE_REPLICATION = (1 << 7),
- SERVICE_WEB_SERVER = (1 << 8),
- SERVICE_ACLK = (1 << 9),
- SERVICE_HEALTH = (1 << 10),
- SERVICE_STREAMING = (1 << 11),
- SERVICE_CONTEXT = (1 << 12),
- SERVICE_ANALYTICS = (1 << 13),
- SERVICE_EXPORTERS = (1 << 14),
- SERVICE_ACLKSYNC = (1 << 15)
+ SERVICE_REPLICATION = (1 << 5),
+ SERVICE_WEB_SERVER = (1 << 6),
+ SERVICE_ACLK = (1 << 7),
+ SERVICE_HEALTH = (1 << 8),
+ SERVICE_STREAMING = (1 << 9),
+ SERVICE_CONTEXT = (1 << 10),
+ SERVICE_ANALYTICS = (1 << 11),
+ SERVICE_EXPORTERS = (1 << 12),
+ SERVICE_ACLKSYNC = (1 << 13)
} SERVICE_TYPE;
typedef enum {