summaryrefslogtreecommitdiffstats
path: root/database
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2022-11-20 23:47:53 +0200
committerGitHub <noreply@github.com>2022-11-20 23:47:53 +0200
commit284f6f3aa4f36cefad2601c490510621496c2b53 (patch)
tree97a7d55627ef7477f431c53a20d0e6f1f738a419 /database
parent2d02484954f68bf7e3015cb649e2f10a9f3c5c95 (diff)
streaming compression, query planner and replication fixes (#14023)
* streaming compression, query planner and replication fixes * remove journal v2 stats from global statistics * disable sql for checking past sql UUIDs * single threaded replication * final replication thread using dictionaries and JudyL for sorting the pending requests * do not timeout the sending socket when there are pending replication requests * streaming receiver using read() instead of fread() * remove FILE * from streaming - now using posix read() and write() * increase timeouts to 10 minutes * apply sender timeout only when there are metrics that are supposed to be streamed * error handling in replication * remove retries on socket read timeout; better error messages * take into account inbound traffic too to detect that a connection is stale * remove race conditions from replication thread * make sure deleted entries are marked as executed, so that even if deletion fails, they will not be executed * 2 minutes timeout to retry streaming to a parent that already has this node * remove unecessary condition check * fix compilation warnings * include judy in replication * wrappers to handle retries for SSL_read and SSL_write * compressed bytes read monitoring * recursive locks on replication to make it faster during flush or cleanup * replication completion chart at the receiver side * simplified recursive mutex * simplified recursive mutex again
Diffstat (limited to 'database')
-rw-r--r--database/rrd.h15
-rw-r--r--database/rrdcalc.c22
-rw-r--r--database/rrdcontext.c203
-rw-r--r--database/rrddim.c17
-rw-r--r--database/rrdhost.c64
-rw-r--r--database/rrdset.c15
-rw-r--r--database/sqlite/sqlite_functions.c109
-rw-r--r--database/sqlite/sqlite_functions.h5
8 files changed, 327 insertions, 123 deletions
diff --git a/database/rrd.h b/database/rrd.h
index 0069715b90..b548aa7169 100644
--- a/database/rrd.h
+++ b/database/rrd.h
@@ -55,6 +55,7 @@ struct pg_cache_page_index;
#include "sqlite/sqlite_health.h"
#include "rrdcontext.h"
+extern bool unittest_running;
extern bool dbengine_enabled;
extern size_t storage_tiers;
extern size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS];
@@ -533,8 +534,9 @@ typedef enum rrdset_flags {
RRDSET_FLAG_SENDER_REPLICATION_FINISHED = (1 << 22), // the sending side has completed replication
RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED = (1 << 23), // the receiving side has completed replication
+ RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS = (1 << 24), // the receiving side has replication in progress
- RRDSET_FLAG_UPSTREAM_SEND_VARIABLES = (1 << 24), // a custom variable has been updated and needs to be exposed to parent
+ RRDSET_FLAG_UPSTREAM_SEND_VARIABLES = (1 << 25), // a custom variable has been updated and needs to be exposed to parent
} RRDSET_FLAGS;
#define rrdset_flag_check(st, flag) (__atomic_load_n(&((st)->flags), __ATOMIC_SEQ_CST) & (flag))
@@ -658,6 +660,14 @@ struct rrdset {
netdata_rwlock_t rwlock; // protection for RRDCALC *base
RRDCALC *base; // double linked list of RRDCALC related to this RRDSET
} alerts;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ struct {
+ bool start_streaming;
+ time_t after;
+ time_t before;
+ } replay;
+#endif
};
#define rrdset_plugin_name(st) string2str((st)->plugin_name)
@@ -757,6 +767,8 @@ typedef enum {
// Configuration options
RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS = (1 << 3), // delete files of obsolete charts
RRDHOST_OPTION_DELETE_ORPHAN_HOST = (1 << 4), // delete the entire host when orphan
+
+ RRDHOST_OPTION_REPLICATION = (1 << 5), // when set, we support replication for this host
} RRDHOST_OPTIONS;
#define rrdhost_option_check(host, flag) ((host)->options & (flag))
@@ -937,7 +949,6 @@ struct rrdhost {
struct rrdpush_destinations *destination; // the current destination from the above list
SIMPLE_PATTERN *rrdpush_send_charts_matching; // pattern to match the charts to be sent
- bool rrdpush_enable_replication; // enable replication
time_t rrdpush_seconds_to_replicate; // max time we want to replicate from the child
time_t rrdpush_replication_step; // seconds per replication step
diff --git a/database/rrdcalc.c b/database/rrdcalc.c
index 41c62e4f6e..aad945a907 100644
--- a/database/rrdcalc.c
+++ b/database/rrdcalc.c
@@ -408,6 +408,8 @@ struct rrdcalc_constructor {
RRDCALC_REACT_NONE,
RRDCALC_REACT_NEW,
} react_action;
+
+ bool existing_from_template;
};
static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) {
@@ -543,6 +545,20 @@ static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_
ctr->react_action = RRDCALC_REACT_NEW;
}
+static bool rrdcalc_rrdhost_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdcalc_new __maybe_unused, void *constructor_data ) {
+ RRDCALC *rc = rrdcalc;
+ struct rrdcalc_constructor *ctr = constructor_data;
+
+ if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE)
+ ctr->existing_from_template = true;
+ else
+ ctr->existing_from_template = false;
+
+ ctr->react_action = RRDCALC_REACT_NONE;
+
+ return false;
+}
+
static void rrdcalc_rrdhost_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) {
RRDCALC *rc = rrdcalc;
struct rrdcalc_constructor *ctr = constructor_data;
@@ -612,6 +628,7 @@ void rrdcalc_rrdhost_index_init(RRDHOST *host) {
host->rrdcalc_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE);
dictionary_register_insert_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_insert_callback, NULL);
+ dictionary_register_conflict_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_conflict_callback, NULL);
dictionary_register_react_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_react_callback, NULL);
dictionary_register_delete_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_delete_callback, host);
}
@@ -635,11 +652,12 @@ void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET
.overwrite_alert_name = overwrite_alert_name,
.overwrite_dimensions = overwrite_dimensions,
.react_action = RRDCALC_REACT_NONE,
+ .existing_from_template = false,
};
dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)(key_len + 1), NULL, sizeof(RRDCALC), &tmp);
- if(tmp.react_action != RRDCALC_REACT_NEW)
- error("RRDCALC: from template '%s' on chart '%s' with key '%s', failed to be added to host '%s'. It already exists.",
+ if(tmp.react_action != RRDCALC_REACT_NEW && tmp.existing_from_template == false)
+ error("RRDCALC: from template '%s' on chart '%s' with key '%s', failed to be added to host '%s'. It is manually configured.",
string2str(rt->name), rrdset_id(st), key, rrdhost_hostname(host));
}
diff --git a/database/rrdcontext.c b/database/rrdcontext.c
index 4e3751e92b..cfa8af3e04 100644
--- a/database/rrdcontext.c
+++ b/database/rrdcontext.c
@@ -38,7 +38,7 @@ typedef enum {
RRD_FLAG_OWN_LABELS = (1 << 4), // this instance has its own labels - not linked to an RRDSET
RRD_FLAG_LIVE_RETENTION = (1 << 5), // we have got live retention from the database
RRD_FLAG_QUEUED_FOR_HUB = (1 << 6), // this context is currently queued to be dispatched to hub
- RRD_FLAG_QUEUED_FOR_POST_PROCESSING = (1 << 7), // this context is currently queued to be post-processed
+ RRD_FLAG_QUEUED_FOR_PP = (1 << 7), // this context is currently queued to be post-processed
RRD_FLAG_HIDDEN = (1 << 8), // don't expose this to the hub or the API
RRD_FLAG_UPDATE_REASON_TRIGGERED = (1 << 9), // the update was triggered by the child object
@@ -46,24 +46,18 @@ typedef enum {
RRD_FLAG_UPDATE_REASON_NEW_OBJECT = (1 << 11), // this object has just been created
RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT = (1 << 12), // we received an update on this object
RRD_FLAG_UPDATE_REASON_CHANGED_LINKING = (1 << 13), // an instance or a metric switched RRDSET or RRDDIM
- RRD_FLAG_UPDATE_REASON_CHANGED_UUID = (1 << 14), // an instance or a metric changed UUID
- RRD_FLAG_UPDATE_REASON_CHANGED_NAME = (1 << 15), // an instance or a metric changed name
- RRD_FLAG_UPDATE_REASON_CHANGED_UNITS = (1 << 16), // this context or instance changed units
- RRD_FLAG_UPDATE_REASON_CHANGED_TITLE = (1 << 17), // this context or instance changed title
- RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY = (1 << 18), // the context or the instance changed family
- RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE = (1 << 19), // this context or instance changed chart type
- RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY = (1 << 20), // this context or instance changed its priority
- RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY = (1 << 21), // the instance or the metric changed update frequency
- RRD_FLAG_UPDATE_REASON_ZERO_RETENTION = (1 << 22), // this object has not retention
- RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T = (1 << 23), // this object changed its oldest time in the db
- RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T = (1 << 24), // this object change its latest time in the db
- RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED = (1 << 25), // this object has stopped being collected
- RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED = (1 << 26), // this object has started being collected
- RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD = (1 << 27), // this context belongs to a host that just disconnected
- RRD_FLAG_UPDATE_REASON_DB_ROTATION = (1 << 28), // this context changed because of a db rotation
- RRD_FLAG_UPDATE_REASON_UNUSED = (1 << 29), // this context is not used anymore
- RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS = (1 << 30), // this context is not used anymore
- RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION = (1 << 31), // this object has updated retention
+ RRD_FLAG_UPDATE_REASON_CHANGED_METADATA = (1 << 14), // this context or instance changed uuid, name, units, title, family, chart type, priority, update every, rrd changed flags
+ RRD_FLAG_UPDATE_REASON_ZERO_RETENTION = (1 << 15), // this object has no retention
+ RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T = (1 << 16), // this object changed its oldest time in the db
+ RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T = (1 << 17), // this object change its latest time in the db
+ RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED = (1 << 18), // this object has stopped being collected
+ RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED = (1 << 19), // this object has started being collected
+ RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD = (1 << 20), // this context belongs to a host that just disconnected
+ RRD_FLAG_UPDATE_REASON_UNUSED = (1 << 21), // this context is not used anymore
+ RRD_FLAG_UPDATE_REASON_DB_ROTATION = (1 << 22), // this context changed because of a db rotation
+
+ // action to perform on an object
+ RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION = (1 << 30), // this object has to update its retention from the db
} RRD_FLAGS;
#define RRD_FLAG_ALL_UPDATE_REASONS ( \
@@ -72,14 +66,7 @@ typedef enum {
|RRD_FLAG_UPDATE_REASON_NEW_OBJECT \
|RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT \
|RRD_FLAG_UPDATE_REASON_CHANGED_LINKING \
- |RRD_FLAG_UPDATE_REASON_CHANGED_UUID \
- |RRD_FLAG_UPDATE_REASON_CHANGED_NAME \
- |RRD_FLAG_UPDATE_REASON_CHANGED_UNITS \
- |RRD_FLAG_UPDATE_REASON_CHANGED_TITLE \
- |RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY \
- |RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE \
- |RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY \
- |RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY \
+ |RRD_FLAG_UPDATE_REASON_CHANGED_METADATA \
|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \
|RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T \
|RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T \
@@ -88,7 +75,6 @@ typedef enum {
|RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD \
|RRD_FLAG_UPDATE_REASON_DB_ROTATION \
|RRD_FLAG_UPDATE_REASON_UNUSED \
- |RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS \
)
#define RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS ( \
@@ -105,7 +91,7 @@ typedef enum {
#define RRD_FLAGS_PREVENTING_DELETIONS ( \
RRD_FLAG_QUEUED_FOR_HUB \
|RRD_FLAG_COLLECTED \
- |RRD_FLAG_QUEUED_FOR_POST_PROCESSING \
+ |RRD_FLAG_QUEUED_FOR_PP \
)
// get all the flags of an object
@@ -203,34 +189,26 @@ static struct rrdcontext_reason {
usec_t delay_ut;
} rrdcontext_reasons[] = {
// context related
- { RRD_FLAG_UPDATE_REASON_TRIGGERED, "triggered transition", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_NEW_OBJECT, "object created", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT, "object updated", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_LOAD_SQL, "loaded from sql", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_TITLE, "changed title", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_UNITS, "changed units", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY, "changed family", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY, "changed priority", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_ZERO_RETENTION, "has no retention", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T, "updated first_time_t", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T, "updated last_time_t", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE, "changed chart type", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED, "stopped collected", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED, "started collected", 5 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_UNUSED, "unused", 5 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_TRIGGERED, "triggered transition", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_NEW_OBJECT, "object created", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT, "object updated", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_LOAD_SQL, "loaded from sql", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_CHANGED_METADATA, "changed metadata", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_ZERO_RETENTION, "has no retention", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T, "updated first_time_t", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T, "updated last_time_t", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED, "stopped collected", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED, "started collected", 5 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_UNUSED, "unused", 5 * USEC_PER_SEC },
// not context related
- { RRD_FLAG_UPDATE_REASON_CHANGED_UUID, "changed uuid", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY, "changed updated every",65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_LINKING, "changed rrd link", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_NAME, "changed name", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, "child disconnected", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_DB_ROTATION, "db rotation", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS, "changed flags", 65 * USEC_PER_SEC },
- { RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION, "updated retention", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_CHANGED_LINKING, "changed rrd link", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, "child disconnected", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_DB_ROTATION, "db rotation", 65 * USEC_PER_SEC },
+ {RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION, "updated retention", 65 * USEC_PER_SEC },
// terminator
- { 0, NULL, 0 },
+ {0, NULL, 0 },
};
@@ -320,7 +298,7 @@ typedef struct rrdcontext {
// ----------------------------------------------------------------------------
// helper one-liners for RRDMETRIC
-static void rrdmetric_update_retention(RRDMETRIC *rm);
+static bool rrdmetric_update_retention(RRDMETRIC *rm);
static inline RRDMETRIC *rrdmetric_acquired_value(RRDMETRIC_ACQUIRED *rma) {
return dictionary_acquired_item_value((DICTIONARY_ITEM *)rma);
@@ -472,7 +450,7 @@ static void rrd_flags_to_buffer(RRD_FLAGS flags, BUFFER *wb) {
if(flags & RRD_FLAG_HIDDEN)
buffer_strcat(wb, "HIDDEN ");
- if(flags & RRD_FLAG_QUEUED_FOR_POST_PROCESSING)
+ if(flags & RRD_FLAG_QUEUED_FOR_PP)
buffer_strcat(wb, "PENDING_UPDATES ");
}
@@ -538,12 +516,39 @@ static bool rrdmetric_conflict_callback(const DICTIONARY_ITEM *item __maybe_unus
string2str(rm->id), string2str(rm_new->id));
if(uuid_compare(rm->uuid, rm_new->uuid) != 0) {
+#ifdef NETDATA_INTERNAL_CHECKS
char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN];
uuid_unparse(rm->uuid, uuid1);
uuid_unparse(rm_new->uuid, uuid2);
- internal_error(true, "RRDMETRIC: '%s' of instance '%s' changed uuid from '%s' to '%s'", string2str(rm->id), string2str(rm->ri->id), uuid1, uuid2);
+
+ time_t old_first_time_t = 0;
+ time_t old_last_time_t = 0;
+ if(rrdmetric_update_retention(rm)) {
+ old_first_time_t = rm->first_time_t;
+ old_last_time_t = rm->last_time_t;
+ }
+
+ uuid_copy(rm->uuid, rm_new->uuid);
+
+ time_t new_first_time_t = 0;
+ time_t new_last_time_t = 0;
+ if(rrdmetric_update_retention(rm)) {
+ new_first_time_t = rm->first_time_t;
+ new_last_time_t = rm->last_time_t;
+ }
+
+ internal_error(true,
+ "RRDMETRIC: '%s' of instance '%s' of host '%s' changed UUID from '%s' (retention %ld to %ld, %ld secs) to '%s' (retention %ld to %ld, %ld secs)"
+ , string2str(rm->id)
+ , string2str(rm->ri->id)
+ , rrdhost_hostname(rm->ri->rc->rrdhost)
+ , uuid1, old_first_time_t, old_last_time_t, old_last_time_t - old_first_time_t
+ , uuid2, new_first_time_t, new_last_time_t, new_last_time_t - new_first_time_t
+ );
+#else
uuid_copy(rm->uuid, rm_new->uuid);
- rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_UUID);
+#endif
+ rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(rm->rrddim && rm_new->rrddim && rm->rrddim != rm_new->rrddim) {
@@ -551,12 +556,14 @@ static bool rrdmetric_conflict_callback(const DICTIONARY_ITEM *item __maybe_unus
rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING);
}
+#ifdef NETDATA_INTERNAL_CHECKS
if(rm->rrddim && uuid_compare(rm->uuid, rm->rrddim->metric_uuid) != 0) {
char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN];
uuid_unparse(rm->uuid, uuid1);
uuid_unparse(rm_new->uuid, uuid2);
internal_error(true, "RRDMETRIC: '%s' is linked to RRDDIM '%s' but they have different UUIDs. RRDMETRIC has '%s', RRDDIM has '%s'", string2str(rm->id), rrddim_id(rm->rrddim), uuid1, uuid2);
}
+#endif
if(rm->rrddim != rm_new->rrddim)
rm->rrddim = rm_new->rrddim;
@@ -565,7 +572,7 @@ static bool rrdmetric_conflict_callback(const DICTIONARY_ITEM *item __maybe_unus
STRING *old = rm->name;
rm->name = string_dup(rm_new->name);
string_freez(old);
- rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_NAME);
+ rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(!rm->first_time_t || (rm_new->first_time_t && rm_new->first_time_t < rm->first_time_t)) {
@@ -800,8 +807,16 @@ static bool rrdinstance_conflict_callback(const DICTIONARY_ITEM *item __maybe_un
string2str(ri->id), string2str(ri_new->id));
if(uuid_compare(ri->uuid, ri_new->uuid) != 0) {
+#ifdef NETDATA_INTERNAL_CHECKS
+ char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN];
+ uuid_unparse(ri->uuid, uuid1);
+ uuid_unparse(ri_new->uuid, uuid2);
+ internal_error(true, "RRDINSTANCE: '%s' of host '%s' changed UUID from '%s' to '%s'",
+ string2str(ri->id), rrdhost_hostname(ri->rc->rrdhost), uuid1, uuid2);
+#endif
+
uuid_copy(ri->uuid, ri_new->uuid);
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UUID);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(ri->rrdset && ri_new->rrdset && ri->rrdset != ri_new->rrdset) {
@@ -809,54 +824,56 @@ static bool rrdinstance_conflict_callback(const DICTIONARY_ITEM *item __maybe_un
rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING);
}
+#ifdef NETDATA_INTERNAL_CHECKS
if(ri->rrdset && uuid_compare(ri->uuid, ri->rrdset->chart_uuid) != 0) {
char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN];
uuid_unparse(ri->uuid, uuid1);
uuid_unparse(ri->rrdset->chart_uuid, uuid2);
internal_error(true, "RRDINSTANCE: '%s' is linked to RRDSET '%s' but they have different UUIDs. RRDINSTANCE has '%s', RRDSET has '%s'", string2str(ri->id), rrdset_id(ri->rrdset), uuid1, uuid2);
}
+#endif
if(ri->name != ri_new->name) {
STRING *old = ri->name;
ri->name = string_dup(ri_new->name);
string_freez(old);
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_NAME);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(ri->title != ri_new->title) {
STRING *old = ri->title;
ri->title = string_dup(ri_new->title);
string_freez(old);
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_TITLE);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(ri->units != ri_new->units) {
STRING *old = ri->units;
ri->units = string_dup(ri_new->units);
string_freez(old);
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UNITS);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(ri->family != ri_new->family) {
STRING *old = ri->family;
ri->family = string_dup(ri_new->family);
string_freez(old);
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(ri->chart_type != ri_new->chart_type) {
ri->chart_type = ri_new->chart_type;
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(ri->priority != ri_new->priority) {
ri->priority = ri_new->priority;
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(ri->update_every != ri_new->update_every) {
ri->update_every = ri_new->update_every;
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(ri->rrdset != ri_new->rrdset) {
@@ -925,11 +942,11 @@ static void rrdinstance_trigger_updates(RRDINSTANCE *ri, const char *function) {
if(likely(st)) {
if(unlikely((unsigned int) st->priority != ri->priority)) {
ri->priority = st->priority;
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(unlikely(st->update_every != ri->update_every)) {
ri->update_every = st->update_every;
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
}
else if(unlikely(rrd_flag_is_collected(ri))) {
@@ -1100,7 +1117,7 @@ static inline void rrdinstance_rrdset_has_updated_retention(RRDSET *st) {
RRDINSTANCE *ri = rrdset_get_rrdinstance(st);
if(unlikely(!ri)) return;
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION);
rrdinstance_trigger_updates(ri, __FUNCTION__ );
}
@@ -1116,7 +1133,7 @@ static inline void rrdinstance_updated_rrdset_name(RRDSET *st) {
ri->name = string_dup(st->name);
string_freez(old);
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_NAME);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
rrdinstance_trigger_updates(ri, __FUNCTION__ );
}
}
@@ -1131,11 +1148,11 @@ static inline void rrdinstance_updated_rrdset_flags_no_action(RRDINSTANCE *ri, R
if(unlikely(st_is_hidden != ri_is_hidden)) {
if (unlikely(st_is_hidden && !ri_is_hidden))
- rrd_flag_set_updated(ri, RRD_FLAG_HIDDEN | RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS);
+ rrd_flag_set_updated(ri, RRD_FLAG_HIDDEN | RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
else if (unlikely(!st_is_hidden && ri_is_hidden)) {
rrd_flag_clear(ri, RRD_FLAG_HIDDEN);
- rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS);
+ rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
}
}
@@ -1269,14 +1286,14 @@ static bool rrdcontext_conflict_callback(const DICTIONARY_ITEM *item __maybe_unu
else
rc->title = string_2way_merge(rc->title, rc_new->title);
string_freez(old_title);
- rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_TITLE);
+ rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(rc->units != rc_new->units) {
STRING *old_units = rc->units;
rc->units = string_dup(rc_new->units);
string_freez(old_units);
- rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_UNITS);
+ rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(rc->family != rc_new->family) {
@@ -1286,17 +1303,17 @@ static bool rrdcontext_conflict_callback(const DICTIONARY_ITEM *item __maybe_unu
else
rc->family = string_2way_merge(rc->family, rc_new->family);
string_freez(old_family);
- rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY);
+ rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(rc->chart_type != rc_new->chart_type) {
rc->chart_type = rc_new->chart_type;
- rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE);
+ rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
if(rc->priority != rc_new->priority) {
rc->priority = rc_new->priority;
- rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY);
+ rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
rrd_flag_set(rc, rc_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no need for atomics on rc_new
@@ -1351,14 +1368,14 @@ static bool rrdcontext_hub_queue_conflict_callback(const DICTIONARY_ITEM *item _
static void rrdcontext_post_processing_queue_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) {
RRDCONTEXT *rc = context;
- rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_POST_PROCESSING);
+ rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP);
rc->pp.queued_flags = rc->flags;
rc->pp.queued_ut = now_realtime_usec();
}
static void rrdcontext_post_processing_queue_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) {
RRDCONTEXT *rc = context;
- rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_POST_PROCESSING);
+ rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_PP);
rc->pp.dequeued_ut = now_realtime_usec();
}
@@ -1366,8 +1383,8 @@ static bool rrdcontext_post_processing_queue_conflict_callback(const DICTIONARY_
RRDCONTEXT *rc = context;
bool changed = false;
- if(!(rc->flags & RRD_FLAG_QUEUED_FOR_POST_PROCESSING)) {
- rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_POST_PROCESSING);
+ if(!(rc->flags & RRD_FLAG_QUEUED_FOR_PP)) {
+ rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP);
changed = true;
}
@@ -3035,7 +3052,7 @@ static void rrdcontext_recalculate_retention_all_hosts(void) {
// ----------------------------------------------------------------------------
// garbage collector
-static void rrdmetric_update_retention(RRDMETRIC *rm) {
+static bool rrdmetric_update_retention(RRDMETRIC *rm) {
time_t min_first_time_t = LONG_MAX, max_last_time_t = 0;
if(rm->rrddim) {
@@ -3060,7 +3077,7 @@ static void rrdmetric_update_retention(RRDMETRIC *rm) {
}
else {
// cannot get retention
- return;
+ return false;
}
#endif
@@ -3090,6 +3107,8 @@ static void rrdmetric_update_retention(RRDMETRIC *rm) {
rrd_flag_set_deleted(rm, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION);
rrd_flag_set(rm, RRD_FLAG_LIVE_RETENTION);
+
+ return true;
}
static inline bool rrdmetric_should_be_deleted(RRDMETRIC *rm) {
@@ -3261,16 +3280,18 @@ static void rrdmetric_process_updates(RRDMETRIC *rm, bool force, RRD_FLAGS reaso
if(reason != RRD_FLAG_NONE)
rrd_flag_set_updated(rm, reason);
- if(!force && !rrd_flag_is_updated(rm) && rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION) && !rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION))
+ if(!force && !rrd_flag_is_updated(rm) && rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION) && !rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION))
return;
if(worker_jobs)
worker_is_busy(WORKER_JOB_PP_METRIC);
- if(reason == RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD) {
+ if(reason & RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD) {
rrd_flag_set_archived(rm);
rrd_flag_set(rm, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD);
}
+ if(rrd_flag_is_deleted(rm) && (reason & RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION))
+ rrd_flag_set_archived(rm);
rrdmetric_update_retention(rm);
@@ -3296,8 +3317,8 @@ static void rrdinstance_post_process_updates(RRDINSTANCE *ri, bool force, RRD_FL
if(unlikely(netdata_exit)) break;
RRD_FLAGS reason_to_pass = reason;
- if(rrd_flag_check(ri, RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION))
- reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION;
+ if(rrd_flag_check(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION))
+ reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION;
rrdmetric_process_updates(rm, force, reason_to_pass, worker_jobs);
@@ -3403,8 +3424,8 @@ static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAG
if(unlikely(netdata_exit)) break;
RRD_FLAGS reason_to_pass = reason;
- if(rrd_flag_check(rc, RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION))
- reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION;
+ if(rrd_flag_check(rc, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION))
+ reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION;
rrdinstance_post_process_updates(ri, force, reason_to_pass, worker_jobs);
@@ -3517,7 +3538,7 @@ static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAG
if (min_priority != LONG_MAX && rc->priority != min_priority) {
rc->priority = min_priority;
- rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY);
+ rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA);
}
}
@@ -3536,7 +3557,7 @@ static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAG
static void rrdcontext_queue_for_post_processing(RRDCONTEXT *rc, const char *function __maybe_unused, RRD_FLAGS flags __maybe_unused) {
if(unlikely(!rc->rrdhost->rrdctx_post_processing_queue)) return;
- if(!rrd_flag_check(rc, RRD_FLAG_QUEUED_FOR_POST_PROCESSING)) {
+ if(!rrd_flag_check(rc, RRD_FLAG_QUEUED_FOR_PP)) {
dictionary_set((DICTIONARY *)rc->rrdhost->rrdctx_post_processing_queue,
string2str(rc->id),
rc,
diff --git a/database/rrddim.c b/database/rrddim.c
index 17e65991f0..06bfef7d72 100644
--- a/database/rrddim.c
+++ b/database/rrddim.c
@@ -84,6 +84,23 @@ static void rrddim_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v
if (unlikely(rrdcontext_find_dimension_uuid(st, rrddim_id(rd), &(rd->metric_uuid)))) {
uuid_generate(rd->metric_uuid);
+ bool found_in_sql = false; (void)found_in_sql;
+
+// bool found_in_sql = true;
+// if(unlikely(sql_find_dimension_uuid(st, rd, &rd->metric_uuid))) {
+// found_in_sql = false;
+// uuid_generate(rd->metric_uuid);
+// }
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ char uuid_str[UUID_STR_LEN];
+ uuid_unparse_lower(rd->metric_uuid, uuid_str);
+ error_report("Dimension UUID for host %s chart [%s] dimension [%s] not found in context. It is now set to %s (%s)",
+ string2str(host->hostname),
+ string2str(st->name),
+ string2str(rd->name),
+ uuid_str, found_in_sql ? "found in sqlite" : "newly generated");
+#endif
}
// initialize the db tiers
diff --git a/database/rrdhost.c b/database/rrdhost.c
index 1027d94eda..d88a215f2c 100644
--- a/database/rrdhost.c
+++ b/database/rrdhost.c
@@ -4,7 +4,7 @@
#include "rrd.h"
bool dbengine_enabled = false; // will become true if and when dbengine is initialized
-size_t storage_tiers = 1;
+size_t storage_tiers = 3;
size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS] = { 1, 60, 60, 60, 60 };
RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS] = { RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW };
@@ -290,7 +290,11 @@ int is_legacy = 1;
host, rrdpush_enabled, rrdpush_destination, rrdpush_api_key, rrdpush_send_charts_matching);
}
- host->rrdpush_enable_replication = rrdpush_enable_replication;
+ if(rrdpush_enable_replication)
+ rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION);
+ else
+ rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION);
+
host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate;
host->rrdpush_replication_step = rrdpush_replication_step;
@@ -616,11 +620,14 @@ void rrdhost_update(RRDHOST *host
rrdcalctemplate_index_init(host);
rrdcalc_rrdhost_index_init(host);
- host->rrdpush_enable_replication = rrdpush_enable_replication;
+ if(rrdpush_enable_replication)
+ rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION);
+ else
+ rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION);
+
host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate;
host->rrdpush_replic