summaryrefslogtreecommitdiffstats
path: root/ml
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2022-09-05 19:31:06 +0300
committerGitHub <noreply@github.com>2022-09-05 19:31:06 +0300
commit5e1b95cf92168c4df74586fb4430dc284806da82 (patch)
treef42077d8b02eaf316683453a7474bd1f599a833d /ml
parent544aef1fde6e79ac57d2dea85d3f063076d7f885 (diff)
Deduplicate all netdata strings (#13570)
* rrdfamily * rrddim * rrdset plugin and module names * rrdset units * rrdset type * rrdset family * rrdset title * rrdset title more * rrdset context * rrdcalctemplate context and removal of context hash from rrdset * strings statistics * rrdset name * rearranged members of rrdset * eliminate rrdset name hash; rrdcalc chart converted to STRING * rrdset id, eliminated rrdset hash * rrdcalc, alarm_entry, alert_config and some of rrdcalctemplate * rrdcalctemplate * rrdvar * eval_variable * rrddimvar and rrdsetvar * rrdhost hostname, os and tags * fix master commits * added thread cache; implemented string_dup without locks * faster thread cache * rrdset and rrddim now use dictionaries for indexing * rrdhost now uses dictionary * rrdfamily now uses DICTIONARY * rrdvar using dictionary instead of AVL * allocate the right size to rrdvar flag members * rrdhost remaining char * members to STRING * * better error handling on indexing * strings now use a read/write lock to allow parallel searches to the index * removed AVL support from dictionaries; implemented STRING with native Judy calls * string releases should be negative * only 31 bits are allowed for enum flags * proper locking on strings * string threading unittest and fixes * fix lgtm finding * fixed naming * stream chart/dimension definitions at the beginning of a streaming session * thread stack variable is undefined on thread cancel * rrdcontext garbage collect per host on startup * worker control in garbage collection * relaxed deletion of rrdmetrics * type checking on dictfe * netdata chart to monitor rrdcontext triggers * Group chart label updates * rrdcontext better handling of collected rrdsets * rrdpush incremental transmition of definitions should use as much buffer as possible * require 1MB per chart * empty the sender buffer before enabling metrics streaming * fill up to 50% of buffer * reset signaling metrics sending * use the shared variable for status * use separate host flag for enabling streaming of metrics * make sure the flag is clear * add logging for streaming * add logging for streaming on buffer overflow * circular_buffer proper sizing * removed obsolete logs * do not execute worker jobs if not necessary * better messages about compression disabling * proper use of flags and updating rrdset last access time every time the obsoletion flag is flipped * monitor stream sender used buffer ratio * Update exporting unit tests * no need to compare label value with strcmp * streaming send workers now monitor bandwidth * workers now use strings * streaming receiver monitors incoming bandwidth * parser shift of worker ids * minor fixes * Group chart label updates * Populate context with dimensions that have data * Fix chart id * better shift of parser worker ids * fix for streaming compression * properly count received bytes * ensure LZ4 compression ring buffer does not wrap prematurely * do not stream empty charts; do not process empty instances in rrdcontext * need_to_send_chart_definition() does not need an rrdset lock any more * rrdcontext objects are collected, after data have been written to the db * better logging of RRDCONTEXT transitions * always set all variables needed by the worker utilization charts * implemented double linked list for most objects; eliminated alarm indexes from rrdhost; and many more fixes * lockless strings design - string_dup() and string_freez() are totally lockless when they dont need to touch Judy - only Judy is protected with a read/write lock * STRING code re-organization for clarity * thread_cache improvements; double numbers precision on worker threads * STRING_ENTRY now shadown STRING, so no duplicate definition is required; string_length() renamed to string_strlen() to follow the paradigm of all other functions, STRING internal statistics are now only compiled with NETDATA_INTERNAL_CHECKS * rrdhost index by hostname now cleans up; aclk queries of archieved hosts do not index hosts * Add index to speed up database context searches * Removed last_updated optimization (was also buggy after latest merge with master) Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Co-authored-by: Vladimir Kobal <vlad@prokk.net>
Diffstat (limited to 'ml')
-rw-r--r--ml/Dimension.h2
-rw-r--r--ml/Host.cc12
-rw-r--r--ml/Host.h4
-rw-r--r--ml/ml.cc4
4 files changed, 10 insertions, 12 deletions
diff --git a/ml/Dimension.h b/ml/Dimension.h
index 4fbc09b981..1cc053df3f 100644
--- a/ml/Dimension.h
+++ b/ml/Dimension.h
@@ -26,7 +26,7 @@ public:
RRDSET *RS = RD->rrdset;
std::stringstream SS;
- SS << RS->context << "|" << RS->id << "|" << RD->name;
+ SS << rrdset_context(RS) << "|" << rrdset_id(RS) << "|" << rrddim_name(RD);
return SS.str();
}
diff --git a/ml/Host.cc b/ml/Host.cc
index f8cba9d64e..a6f9b330a7 100644
--- a/ml/Host.cc
+++ b/ml/Host.cc
@@ -23,7 +23,7 @@ static void updateDimensionsChart(RRDHOST *RH,
std::stringstream IdSS, NameSS;
IdSS << "dimensions_on_" << localhost->machine_guid;
- NameSS << "dimensions_on_" << localhost->hostname;
+ NameSS << "dimensions_on_" << rrdhost_hostname(localhost);
RS = rrdset_create(
RH,
@@ -69,7 +69,7 @@ static void updateRateChart(RRDHOST *RH, collected_number AnomalyRate) {
std::stringstream IdSS, NameSS;
IdSS << "anomaly_rate_on_" << localhost->machine_guid;
- NameSS << "anomaly_rate_on_" << localhost->hostname;
+ NameSS << "anomaly_rate_on_" << rrdhost_hostname(localhost);
RS = rrdset_create(
RH,
@@ -106,7 +106,7 @@ static void updateWindowLengthChart(RRDHOST *RH, collected_number WindowLength)
std::stringstream IdSS, NameSS;
IdSS << "detector_window_on_" << localhost->machine_guid;
- NameSS << "detector_window_on_" << localhost->hostname;
+ NameSS << "detector_window_on_" << rrdhost_hostname(localhost);
RS = rrdset_create(
RH,
@@ -147,7 +147,7 @@ static void updateEventsChart(RRDHOST *RH,
std::stringstream IdSS, NameSS;
IdSS << "detector_events_on_" << localhost->machine_guid;
- NameSS << "detector_events_on_" << localhost->hostname;
+ NameSS << "detector_events_on_" << rrdhost_hostname(localhost);
RS = rrdset_create(
RH,
@@ -193,7 +193,7 @@ static void updateDetectionChart(RRDHOST *RH) {
std::stringstream IdSS, NameSS;
IdSS << "prediction_stats_" << RH->machine_guid;
- NameSS << "prediction_stats_for_" << RH->hostname;
+ NameSS << "prediction_stats_for_" << rrdhost_hostname(RH);
RS = rrdset_create_localhost(
"netdata", // type
@@ -233,7 +233,7 @@ static void updateTrainingChart(RRDHOST *RH, struct rusage *TRU)
std::stringstream IdSS, NameSS;
IdSS << "training_stats_" << RH->machine_guid;
- NameSS << "training_stats_for_" << RH->hostname;
+ NameSS << "training_stats_for_" << rrdhost_hostname(RH);
RS = rrdset_create_localhost(
"netdata", // type
diff --git a/ml/Host.h b/ml/Host.h
index 2715008f09..5fd8318fd1 100644
--- a/ml/Host.h
+++ b/ml/Host.h
@@ -31,9 +31,7 @@ public:
RRDSET_TYPE_LINE
);
- AnomalyRateRS->flags = static_cast<RRDSET_FLAGS>(
- static_cast<int>(AnomalyRateRS->flags) | RRDSET_FLAG_HIDDEN
- );
+ rrdset_flag_set(AnomalyRateRS, RRDSET_FLAG_HIDDEN);
}
RRDHOST *getRH() { return RH; }
diff --git a/ml/ml.cc b/ml/ml.cc
index 7275d88b87..8c7f56dd73 100644
--- a/ml/ml.cc
+++ b/ml/ml.cc
@@ -16,7 +16,7 @@ bool ml_enabled(RRDHOST *RH) {
if (!Cfg.EnableAnomalyDetection)
return false;
- if (simple_pattern_matches(Cfg.SP_HostsToSkip, RH->hostname))
+ if (simple_pattern_matches(Cfg.SP_HostsToSkip, rrdhost_hostname(RH)))
return false;
return true;
@@ -76,7 +76,7 @@ void ml_new_dimension(RRDDIM *RD) {
if (static_cast<unsigned>(RD->update_every) != H->updateEvery())
return;
- if (simple_pattern_matches(Cfg.SP_ChartsToSkip, RS->name))
+ if (simple_pattern_matches(Cfg.SP_ChartsToSkip, rrdset_name(RS)))
return;
Dimension *D = new Dimension(RD);