diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2022-11-20 23:47:53 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-20 23:47:53 +0200 |
commit | 284f6f3aa4f36cefad2601c490510621496c2b53 (patch) | |
tree | 97a7d55627ef7477f431c53a20d0e6f1f738a419 /database | |
parent | 2d02484954f68bf7e3015cb649e2f10a9f3c5c95 (diff) |
streaming compression, query planner and replication fixes (#14023)
* streaming compression, query planner and replication fixes
* remove journal v2 stats from global statistics
* disable sql for checking past sql UUIDs
* single threaded replication
* final replication thread using dictionaries and JudyL for sorting the pending requests
* do not timeout the sending socket when there are pending replication requests
* streaming receiver using read() instead of fread()
* remove FILE * from streaming - now using posix read() and write()
* increase timeouts to 10 minutes
* apply sender timeout only when there are metrics that are supposed to be streamed
* error handling in replication
* remove retries on socket read timeout; better error messages
* take into account inbound traffic too to detect that a connection is stale
* remove race conditions from replication thread
* make sure deleted entries are marked as executed, so that even if deletion fails, they will not be executed
* 2 minutes timeout to retry streaming to a parent that already has this node
* remove unecessary condition check
* fix compilation warnings
* include judy in replication
* wrappers to handle retries for SSL_read and SSL_write
* compressed bytes read monitoring
* recursive locks on replication to make it faster during flush or cleanup
* replication completion chart at the receiver side
* simplified recursive mutex
* simplified recursive mutex again
Diffstat (limited to 'database')
-rw-r--r-- | database/rrd.h | 15 | ||||
-rw-r--r-- | database/rrdcalc.c | 22 | ||||
-rw-r--r-- | database/rrdcontext.c | 203 | ||||
-rw-r--r-- | database/rrddim.c | 17 | ||||
-rw-r--r-- | database/rrdhost.c | 64 | ||||
-rw-r--r-- | database/rrdset.c | 15 | ||||
-rw-r--r-- | database/sqlite/sqlite_functions.c | 109 | ||||
-rw-r--r-- | database/sqlite/sqlite_functions.h | 5 |
8 files changed, 327 insertions, 123 deletions
diff --git a/database/rrd.h b/database/rrd.h index 0069715b90..b548aa7169 100644 --- a/database/rrd.h +++ b/database/rrd.h @@ -55,6 +55,7 @@ struct pg_cache_page_index; #include "sqlite/sqlite_health.h" #include "rrdcontext.h" +extern bool unittest_running; extern bool dbengine_enabled; extern size_t storage_tiers; extern size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS]; @@ -533,8 +534,9 @@ typedef enum rrdset_flags { RRDSET_FLAG_SENDER_REPLICATION_FINISHED = (1 << 22), // the sending side has completed replication RRDSET_FLAG_RECEIVER_REPLICATION_FINISHED = (1 << 23), // the receiving side has completed replication + RRDSET_FLAG_RECEIVER_REPLICATION_IN_PROGRESS = (1 << 24), // the receiving side has replication in progress - RRDSET_FLAG_UPSTREAM_SEND_VARIABLES = (1 << 24), // a custom variable has been updated and needs to be exposed to parent + RRDSET_FLAG_UPSTREAM_SEND_VARIABLES = (1 << 25), // a custom variable has been updated and needs to be exposed to parent } RRDSET_FLAGS; #define rrdset_flag_check(st, flag) (__atomic_load_n(&((st)->flags), __ATOMIC_SEQ_CST) & (flag)) @@ -658,6 +660,14 @@ struct rrdset { netdata_rwlock_t rwlock; // protection for RRDCALC *base RRDCALC *base; // double linked list of RRDCALC related to this RRDSET } alerts; + +#ifdef NETDATA_INTERNAL_CHECKS + struct { + bool start_streaming; + time_t after; + time_t before; + } replay; +#endif }; #define rrdset_plugin_name(st) string2str((st)->plugin_name) @@ -757,6 +767,8 @@ typedef enum { // Configuration options RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS = (1 << 3), // delete files of obsolete charts RRDHOST_OPTION_DELETE_ORPHAN_HOST = (1 << 4), // delete the entire host when orphan + + RRDHOST_OPTION_REPLICATION = (1 << 5), // when set, we support replication for this host } RRDHOST_OPTIONS; #define rrdhost_option_check(host, flag) ((host)->options & (flag)) @@ -937,7 +949,6 @@ struct rrdhost { struct rrdpush_destinations *destination; // the current destination from the above list SIMPLE_PATTERN *rrdpush_send_charts_matching; // pattern to match the charts to be sent - bool rrdpush_enable_replication; // enable replication time_t rrdpush_seconds_to_replicate; // max time we want to replicate from the child time_t rrdpush_replication_step; // seconds per replication step diff --git a/database/rrdcalc.c b/database/rrdcalc.c index 41c62e4f6e..aad945a907 100644 --- a/database/rrdcalc.c +++ b/database/rrdcalc.c @@ -408,6 +408,8 @@ struct rrdcalc_constructor { RRDCALC_REACT_NONE, RRDCALC_REACT_NEW, } react_action; + + bool existing_from_template; }; static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) { @@ -543,6 +545,20 @@ static void rrdcalc_rrdhost_insert_callback(const DICTIONARY_ITEM *item __maybe_ ctr->react_action = RRDCALC_REACT_NEW; } +static bool rrdcalc_rrdhost_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *rrdcalc_new __maybe_unused, void *constructor_data ) { + RRDCALC *rc = rrdcalc; + struct rrdcalc_constructor *ctr = constructor_data; + + if(rc->run_flags & RRDCALC_FLAG_FROM_TEMPLATE) + ctr->existing_from_template = true; + else + ctr->existing_from_template = false; + + ctr->react_action = RRDCALC_REACT_NONE; + + return false; +} + static void rrdcalc_rrdhost_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrdcalc, void *constructor_data) { RRDCALC *rc = rrdcalc; struct rrdcalc_constructor *ctr = constructor_data; @@ -612,6 +628,7 @@ void rrdcalc_rrdhost_index_init(RRDHOST *host) { host->rrdcalc_root_index = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); dictionary_register_insert_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_insert_callback, NULL); + dictionary_register_conflict_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_conflict_callback, NULL); dictionary_register_react_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_react_callback, NULL); dictionary_register_delete_callback(host->rrdcalc_root_index, rrdcalc_rrdhost_delete_callback, host); } @@ -635,11 +652,12 @@ void rrdcalc_add_from_rrdcalctemplate(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET .overwrite_alert_name = overwrite_alert_name, .overwrite_dimensions = overwrite_dimensions, .react_action = RRDCALC_REACT_NONE, + .existing_from_template = false, }; dictionary_set_advanced(host->rrdcalc_root_index, key, (ssize_t)(key_len + 1), NULL, sizeof(RRDCALC), &tmp); - if(tmp.react_action != RRDCALC_REACT_NEW) - error("RRDCALC: from template '%s' on chart '%s' with key '%s', failed to be added to host '%s'. It already exists.", + if(tmp.react_action != RRDCALC_REACT_NEW && tmp.existing_from_template == false) + error("RRDCALC: from template '%s' on chart '%s' with key '%s', failed to be added to host '%s'. It is manually configured.", string2str(rt->name), rrdset_id(st), key, rrdhost_hostname(host)); } diff --git a/database/rrdcontext.c b/database/rrdcontext.c index 4e3751e92b..cfa8af3e04 100644 --- a/database/rrdcontext.c +++ b/database/rrdcontext.c @@ -38,7 +38,7 @@ typedef enum { RRD_FLAG_OWN_LABELS = (1 << 4), // this instance has its own labels - not linked to an RRDSET RRD_FLAG_LIVE_RETENTION = (1 << 5), // we have got live retention from the database RRD_FLAG_QUEUED_FOR_HUB = (1 << 6), // this context is currently queued to be dispatched to hub - RRD_FLAG_QUEUED_FOR_POST_PROCESSING = (1 << 7), // this context is currently queued to be post-processed + RRD_FLAG_QUEUED_FOR_PP = (1 << 7), // this context is currently queued to be post-processed RRD_FLAG_HIDDEN = (1 << 8), // don't expose this to the hub or the API RRD_FLAG_UPDATE_REASON_TRIGGERED = (1 << 9), // the update was triggered by the child object @@ -46,24 +46,18 @@ typedef enum { RRD_FLAG_UPDATE_REASON_NEW_OBJECT = (1 << 11), // this object has just been created RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT = (1 << 12), // we received an update on this object RRD_FLAG_UPDATE_REASON_CHANGED_LINKING = (1 << 13), // an instance or a metric switched RRDSET or RRDDIM - RRD_FLAG_UPDATE_REASON_CHANGED_UUID = (1 << 14), // an instance or a metric changed UUID - RRD_FLAG_UPDATE_REASON_CHANGED_NAME = (1 << 15), // an instance or a metric changed name - RRD_FLAG_UPDATE_REASON_CHANGED_UNITS = (1 << 16), // this context or instance changed units - RRD_FLAG_UPDATE_REASON_CHANGED_TITLE = (1 << 17), // this context or instance changed title - RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY = (1 << 18), // the context or the instance changed family - RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE = (1 << 19), // this context or instance changed chart type - RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY = (1 << 20), // this context or instance changed its priority - RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY = (1 << 21), // the instance or the metric changed update frequency - RRD_FLAG_UPDATE_REASON_ZERO_RETENTION = (1 << 22), // this object has not retention - RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T = (1 << 23), // this object changed its oldest time in the db - RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T = (1 << 24), // this object change its latest time in the db - RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED = (1 << 25), // this object has stopped being collected - RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED = (1 << 26), // this object has started being collected - RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD = (1 << 27), // this context belongs to a host that just disconnected - RRD_FLAG_UPDATE_REASON_DB_ROTATION = (1 << 28), // this context changed because of a db rotation - RRD_FLAG_UPDATE_REASON_UNUSED = (1 << 29), // this context is not used anymore - RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS = (1 << 30), // this context is not used anymore - RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION = (1 << 31), // this object has updated retention + RRD_FLAG_UPDATE_REASON_CHANGED_METADATA = (1 << 14), // this context or instance changed uuid, name, units, title, family, chart type, priority, update every, rrd changed flags + RRD_FLAG_UPDATE_REASON_ZERO_RETENTION = (1 << 15), // this object has no retention + RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T = (1 << 16), // this object changed its oldest time in the db + RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T = (1 << 17), // this object change its latest time in the db + RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED = (1 << 18), // this object has stopped being collected + RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED = (1 << 19), // this object has started being collected + RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD = (1 << 20), // this context belongs to a host that just disconnected + RRD_FLAG_UPDATE_REASON_UNUSED = (1 << 21), // this context is not used anymore + RRD_FLAG_UPDATE_REASON_DB_ROTATION = (1 << 22), // this context changed because of a db rotation + + // action to perform on an object + RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION = (1 << 30), // this object has to update its retention from the db } RRD_FLAGS; #define RRD_FLAG_ALL_UPDATE_REASONS ( \ @@ -72,14 +66,7 @@ typedef enum { |RRD_FLAG_UPDATE_REASON_NEW_OBJECT \ |RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT \ |RRD_FLAG_UPDATE_REASON_CHANGED_LINKING \ - |RRD_FLAG_UPDATE_REASON_CHANGED_UUID \ - |RRD_FLAG_UPDATE_REASON_CHANGED_NAME \ - |RRD_FLAG_UPDATE_REASON_CHANGED_UNITS \ - |RRD_FLAG_UPDATE_REASON_CHANGED_TITLE \ - |RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY \ - |RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE \ - |RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY \ - |RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY \ + |RRD_FLAG_UPDATE_REASON_CHANGED_METADATA \ |RRD_FLAG_UPDATE_REASON_ZERO_RETENTION \ |RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T \ |RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T \ @@ -88,7 +75,6 @@ typedef enum { |RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD \ |RRD_FLAG_UPDATE_REASON_DB_ROTATION \ |RRD_FLAG_UPDATE_REASON_UNUSED \ - |RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS \ ) #define RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS ( \ @@ -105,7 +91,7 @@ typedef enum { #define RRD_FLAGS_PREVENTING_DELETIONS ( \ RRD_FLAG_QUEUED_FOR_HUB \ |RRD_FLAG_COLLECTED \ - |RRD_FLAG_QUEUED_FOR_POST_PROCESSING \ + |RRD_FLAG_QUEUED_FOR_PP \ ) // get all the flags of an object @@ -203,34 +189,26 @@ static struct rrdcontext_reason { usec_t delay_ut; } rrdcontext_reasons[] = { // context related - { RRD_FLAG_UPDATE_REASON_TRIGGERED, "triggered transition", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_NEW_OBJECT, "object created", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT, "object updated", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_LOAD_SQL, "loaded from sql", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_TITLE, "changed title", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_UNITS, "changed units", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY, "changed family", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY, "changed priority", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_ZERO_RETENTION, "has no retention", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T, "updated first_time_t", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T, "updated last_time_t", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE, "changed chart type", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED, "stopped collected", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED, "started collected", 5 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_UNUSED, "unused", 5 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_TRIGGERED, "triggered transition", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_NEW_OBJECT, "object created", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UPDATED_OBJECT, "object updated", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_LOAD_SQL, "loaded from sql", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_METADATA, "changed metadata", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_ZERO_RETENTION, "has no retention", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_FIRST_TIME_T, "updated first_time_t", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_LAST_TIME_T, "updated last_time_t", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_STOPPED_BEING_COLLECTED, "stopped collected", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_STARTED_BEING_COLLECTED, "started collected", 5 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UNUSED, "unused", 5 * USEC_PER_SEC }, // not context related - { RRD_FLAG_UPDATE_REASON_CHANGED_UUID, "changed uuid", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY, "changed updated every",65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_LINKING, "changed rrd link", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_NAME, "changed name", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, "child disconnected", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_DB_ROTATION, "db rotation", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS, "changed flags", 65 * USEC_PER_SEC }, - { RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION, "updated retention", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_CHANGED_LINKING, "changed rrd link", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD, "child disconnected", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_DB_ROTATION, "db rotation", 65 * USEC_PER_SEC }, + {RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION, "updated retention", 65 * USEC_PER_SEC }, // terminator - { 0, NULL, 0 }, + {0, NULL, 0 }, }; @@ -320,7 +298,7 @@ typedef struct rrdcontext { // ---------------------------------------------------------------------------- // helper one-liners for RRDMETRIC -static void rrdmetric_update_retention(RRDMETRIC *rm); +static bool rrdmetric_update_retention(RRDMETRIC *rm); static inline RRDMETRIC *rrdmetric_acquired_value(RRDMETRIC_ACQUIRED *rma) { return dictionary_acquired_item_value((DICTIONARY_ITEM *)rma); @@ -472,7 +450,7 @@ static void rrd_flags_to_buffer(RRD_FLAGS flags, BUFFER *wb) { if(flags & RRD_FLAG_HIDDEN) buffer_strcat(wb, "HIDDEN "); - if(flags & RRD_FLAG_QUEUED_FOR_POST_PROCESSING) + if(flags & RRD_FLAG_QUEUED_FOR_PP) buffer_strcat(wb, "PENDING_UPDATES "); } @@ -538,12 +516,39 @@ static bool rrdmetric_conflict_callback(const DICTIONARY_ITEM *item __maybe_unus string2str(rm->id), string2str(rm_new->id)); if(uuid_compare(rm->uuid, rm_new->uuid) != 0) { +#ifdef NETDATA_INTERNAL_CHECKS char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; uuid_unparse(rm->uuid, uuid1); uuid_unparse(rm_new->uuid, uuid2); - internal_error(true, "RRDMETRIC: '%s' of instance '%s' changed uuid from '%s' to '%s'", string2str(rm->id), string2str(rm->ri->id), uuid1, uuid2); + + time_t old_first_time_t = 0; + time_t old_last_time_t = 0; + if(rrdmetric_update_retention(rm)) { + old_first_time_t = rm->first_time_t; + old_last_time_t = rm->last_time_t; + } + + uuid_copy(rm->uuid, rm_new->uuid); + + time_t new_first_time_t = 0; + time_t new_last_time_t = 0; + if(rrdmetric_update_retention(rm)) { + new_first_time_t = rm->first_time_t; + new_last_time_t = rm->last_time_t; + } + + internal_error(true, + "RRDMETRIC: '%s' of instance '%s' of host '%s' changed UUID from '%s' (retention %ld to %ld, %ld secs) to '%s' (retention %ld to %ld, %ld secs)" + , string2str(rm->id) + , string2str(rm->ri->id) + , rrdhost_hostname(rm->ri->rc->rrdhost) + , uuid1, old_first_time_t, old_last_time_t, old_last_time_t - old_first_time_t + , uuid2, new_first_time_t, new_last_time_t, new_last_time_t - new_first_time_t + ); +#else uuid_copy(rm->uuid, rm_new->uuid); - rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_UUID); +#endif + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rm->rrddim && rm_new->rrddim && rm->rrddim != rm_new->rrddim) { @@ -551,12 +556,14 @@ static bool rrdmetric_conflict_callback(const DICTIONARY_ITEM *item __maybe_unus rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); } +#ifdef NETDATA_INTERNAL_CHECKS if(rm->rrddim && uuid_compare(rm->uuid, rm->rrddim->metric_uuid) != 0) { char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; uuid_unparse(rm->uuid, uuid1); uuid_unparse(rm_new->uuid, uuid2); internal_error(true, "RRDMETRIC: '%s' is linked to RRDDIM '%s' but they have different UUIDs. RRDMETRIC has '%s', RRDDIM has '%s'", string2str(rm->id), rrddim_id(rm->rrddim), uuid1, uuid2); } +#endif if(rm->rrddim != rm_new->rrddim) rm->rrddim = rm_new->rrddim; @@ -565,7 +572,7 @@ static bool rrdmetric_conflict_callback(const DICTIONARY_ITEM *item __maybe_unus STRING *old = rm->name; rm->name = string_dup(rm_new->name); string_freez(old); - rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_NAME); + rrd_flag_set_updated(rm, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(!rm->first_time_t || (rm_new->first_time_t && rm_new->first_time_t < rm->first_time_t)) { @@ -800,8 +807,16 @@ static bool rrdinstance_conflict_callback(const DICTIONARY_ITEM *item __maybe_un string2str(ri->id), string2str(ri_new->id)); if(uuid_compare(ri->uuid, ri_new->uuid) != 0) { +#ifdef NETDATA_INTERNAL_CHECKS + char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; + uuid_unparse(ri->uuid, uuid1); + uuid_unparse(ri_new->uuid, uuid2); + internal_error(true, "RRDINSTANCE: '%s' of host '%s' changed UUID from '%s' to '%s'", + string2str(ri->id), rrdhost_hostname(ri->rc->rrdhost), uuid1, uuid2); +#endif + uuid_copy(ri->uuid, ri_new->uuid); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UUID); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->rrdset && ri_new->rrdset && ri->rrdset != ri_new->rrdset) { @@ -809,54 +824,56 @@ static bool rrdinstance_conflict_callback(const DICTIONARY_ITEM *item __maybe_un rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_LINKING); } +#ifdef NETDATA_INTERNAL_CHECKS if(ri->rrdset && uuid_compare(ri->uuid, ri->rrdset->chart_uuid) != 0) { char uuid1[UUID_STR_LEN], uuid2[UUID_STR_LEN]; uuid_unparse(ri->uuid, uuid1); uuid_unparse(ri->rrdset->chart_uuid, uuid2); internal_error(true, "RRDINSTANCE: '%s' is linked to RRDSET '%s' but they have different UUIDs. RRDINSTANCE has '%s', RRDSET has '%s'", string2str(ri->id), rrdset_id(ri->rrdset), uuid1, uuid2); } +#endif if(ri->name != ri_new->name) { STRING *old = ri->name; ri->name = string_dup(ri_new->name); string_freez(old); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_NAME); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->title != ri_new->title) { STRING *old = ri->title; ri->title = string_dup(ri_new->title); string_freez(old); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_TITLE); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->units != ri_new->units) { STRING *old = ri->units; ri->units = string_dup(ri_new->units); string_freez(old); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UNITS); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->family != ri_new->family) { STRING *old = ri->family; ri->family = string_dup(ri_new->family); string_freez(old); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->chart_type != ri_new->chart_type) { ri->chart_type = ri_new->chart_type; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->priority != ri_new->priority) { ri->priority = ri_new->priority; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->update_every != ri_new->update_every) { ri->update_every = ri_new->update_every; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(ri->rrdset != ri_new->rrdset) { @@ -925,11 +942,11 @@ static void rrdinstance_trigger_updates(RRDINSTANCE *ri, const char *function) { if(likely(st)) { if(unlikely((unsigned int) st->priority != ri->priority)) { ri->priority = st->priority; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(unlikely(st->update_every != ri->update_every)) { ri->update_every = st->update_every; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_UPDATE_EVERY); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } } else if(unlikely(rrd_flag_is_collected(ri))) { @@ -1100,7 +1117,7 @@ static inline void rrdinstance_rrdset_has_updated_retention(RRDSET *st) { RRDINSTANCE *ri = rrdset_get_rrdinstance(st); if(unlikely(!ri)) return; - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION); rrdinstance_trigger_updates(ri, __FUNCTION__ ); } @@ -1116,7 +1133,7 @@ static inline void rrdinstance_updated_rrdset_name(RRDSET *st) { ri->name = string_dup(st->name); string_freez(old); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_NAME); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); rrdinstance_trigger_updates(ri, __FUNCTION__ ); } } @@ -1131,11 +1148,11 @@ static inline void rrdinstance_updated_rrdset_flags_no_action(RRDINSTANCE *ri, R if(unlikely(st_is_hidden != ri_is_hidden)) { if (unlikely(st_is_hidden && !ri_is_hidden)) - rrd_flag_set_updated(ri, RRD_FLAG_HIDDEN | RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS); + rrd_flag_set_updated(ri, RRD_FLAG_HIDDEN | RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); else if (unlikely(!st_is_hidden && ri_is_hidden)) { rrd_flag_clear(ri, RRD_FLAG_HIDDEN); - rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_FLAGS); + rrd_flag_set_updated(ri, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } } } @@ -1269,14 +1286,14 @@ static bool rrdcontext_conflict_callback(const DICTIONARY_ITEM *item __maybe_unu else rc->title = string_2way_merge(rc->title, rc_new->title); string_freez(old_title); - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_TITLE); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rc->units != rc_new->units) { STRING *old_units = rc->units; rc->units = string_dup(rc_new->units); string_freez(old_units); - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_UNITS); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rc->family != rc_new->family) { @@ -1286,17 +1303,17 @@ static bool rrdcontext_conflict_callback(const DICTIONARY_ITEM *item __maybe_unu else rc->family = string_2way_merge(rc->family, rc_new->family); string_freez(old_family); - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_FAMILY); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rc->chart_type != rc_new->chart_type) { rc->chart_type = rc_new->chart_type; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_CHART_TYPE); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } if(rc->priority != rc_new->priority) { rc->priority = rc_new->priority; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } rrd_flag_set(rc, rc_new->flags & RRD_FLAGS_ALLOWED_EXTERNALLY_ON_NEW_OBJECTS); // no need for atomics on rc_new @@ -1351,14 +1368,14 @@ static bool rrdcontext_hub_queue_conflict_callback(const DICTIONARY_ITEM *item _ static void rrdcontext_post_processing_queue_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { RRDCONTEXT *rc = context; - rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_POST_PROCESSING); + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP); rc->pp.queued_flags = rc->flags; rc->pp.queued_ut = now_realtime_usec(); } static void rrdcontext_post_processing_queue_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *context, void *nothing __maybe_unused) { RRDCONTEXT *rc = context; - rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_POST_PROCESSING); + rrd_flag_clear(rc, RRD_FLAG_QUEUED_FOR_PP); rc->pp.dequeued_ut = now_realtime_usec(); } @@ -1366,8 +1383,8 @@ static bool rrdcontext_post_processing_queue_conflict_callback(const DICTIONARY_ RRDCONTEXT *rc = context; bool changed = false; - if(!(rc->flags & RRD_FLAG_QUEUED_FOR_POST_PROCESSING)) { - rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_POST_PROCESSING); + if(!(rc->flags & RRD_FLAG_QUEUED_FOR_PP)) { + rrd_flag_set(rc, RRD_FLAG_QUEUED_FOR_PP); changed = true; } @@ -3035,7 +3052,7 @@ static void rrdcontext_recalculate_retention_all_hosts(void) { // ---------------------------------------------------------------------------- // garbage collector -static void rrdmetric_update_retention(RRDMETRIC *rm) { +static bool rrdmetric_update_retention(RRDMETRIC *rm) { time_t min_first_time_t = LONG_MAX, max_last_time_t = 0; if(rm->rrddim) { @@ -3060,7 +3077,7 @@ static void rrdmetric_update_retention(RRDMETRIC *rm) { } else { // cannot get retention - return; + return false; } #endif @@ -3090,6 +3107,8 @@ static void rrdmetric_update_retention(RRDMETRIC *rm) { rrd_flag_set_deleted(rm, RRD_FLAG_UPDATE_REASON_ZERO_RETENTION); rrd_flag_set(rm, RRD_FLAG_LIVE_RETENTION); + + return true; } static inline bool rrdmetric_should_be_deleted(RRDMETRIC *rm) { @@ -3261,16 +3280,18 @@ static void rrdmetric_process_updates(RRDMETRIC *rm, bool force, RRD_FLAGS reaso if(reason != RRD_FLAG_NONE) rrd_flag_set_updated(rm, reason); - if(!force && !rrd_flag_is_updated(rm) && rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION) && !rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION)) + if(!force && !rrd_flag_is_updated(rm) && rrd_flag_check(rm, RRD_FLAG_LIVE_RETENTION) && !rrd_flag_check(rm, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) return; if(worker_jobs) worker_is_busy(WORKER_JOB_PP_METRIC); - if(reason == RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD) { + if(reason & RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD) { rrd_flag_set_archived(rm); rrd_flag_set(rm, RRD_FLAG_UPDATE_REASON_DISCONNECTED_CHILD); } + if(rrd_flag_is_deleted(rm) && (reason & RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + rrd_flag_set_archived(rm); rrdmetric_update_retention(rm); @@ -3296,8 +3317,8 @@ static void rrdinstance_post_process_updates(RRDINSTANCE *ri, bool force, RRD_FL if(unlikely(netdata_exit)) break; RRD_FLAGS reason_to_pass = reason; - if(rrd_flag_check(ri, RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION)) - reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION; + if(rrd_flag_check(ri, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION; rrdmetric_process_updates(rm, force, reason_to_pass, worker_jobs); @@ -3403,8 +3424,8 @@ static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAG if(unlikely(netdata_exit)) break; RRD_FLAGS reason_to_pass = reason; - if(rrd_flag_check(rc, RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION)) - reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATED_RETENTION; + if(rrd_flag_check(rc, RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION)) + reason_to_pass |= RRD_FLAG_UPDATE_REASON_UPDATE_RETENTION; rrdinstance_post_process_updates(ri, force, reason_to_pass, worker_jobs); @@ -3517,7 +3538,7 @@ static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAG if (min_priority != LONG_MAX && rc->priority != min_priority) { rc->priority = min_priority; - rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_PRIORITY); + rrd_flag_set_updated(rc, RRD_FLAG_UPDATE_REASON_CHANGED_METADATA); } } @@ -3536,7 +3557,7 @@ static void rrdcontext_post_process_updates(RRDCONTEXT *rc, bool force, RRD_FLAG static void rrdcontext_queue_for_post_processing(RRDCONTEXT *rc, const char *function __maybe_unused, RRD_FLAGS flags __maybe_unused) { if(unlikely(!rc->rrdhost->rrdctx_post_processing_queue)) return; - if(!rrd_flag_check(rc, RRD_FLAG_QUEUED_FOR_POST_PROCESSING)) { + if(!rrd_flag_check(rc, RRD_FLAG_QUEUED_FOR_PP)) { dictionary_set((DICTIONARY *)rc->rrdhost->rrdctx_post_processing_queue, string2str(rc->id), rc, diff --git a/database/rrddim.c b/database/rrddim.c index 17e65991f0..06bfef7d72 100644 --- a/database/rrddim.c +++ b/database/rrddim.c @@ -84,6 +84,23 @@ static void rrddim_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v if (unlikely(rrdcontext_find_dimension_uuid(st, rrddim_id(rd), &(rd->metric_uuid)))) { uuid_generate(rd->metric_uuid); + bool found_in_sql = false; (void)found_in_sql; + +// bool found_in_sql = true; +// if(unlikely(sql_find_dimension_uuid(st, rd, &rd->metric_uuid))) { +// found_in_sql = false; +// uuid_generate(rd->metric_uuid); +// } + +#ifdef NETDATA_INTERNAL_CHECKS + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(rd->metric_uuid, uuid_str); + error_report("Dimension UUID for host %s chart [%s] dimension [%s] not found in context. It is now set to %s (%s)", + string2str(host->hostname), + string2str(st->name), + string2str(rd->name), + uuid_str, found_in_sql ? "found in sqlite" : "newly generated"); +#endif } // initialize the db tiers diff --git a/database/rrdhost.c b/database/rrdhost.c index 1027d94eda..d88a215f2c 100644 --- a/database/rrdhost.c +++ b/database/rrdhost.c @@ -4,7 +4,7 @@ #include "rrd.h" bool dbengine_enabled = false; // will become true if and when dbengine is initialized -size_t storage_tiers = 1; +size_t storage_tiers = 3; size_t storage_tiers_grouping_iterations[RRD_STORAGE_TIERS] = { 1, 60, 60, 60, 60 }; RRD_BACKFILL storage_tiers_backfill[RRD_STORAGE_TIERS] = { RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW, RRD_BACKFILL_NEW }; @@ -290,7 +290,11 @@ int is_legacy = 1; host, rrdpush_enabled, rrdpush_destination, rrdpush_api_key, rrdpush_send_charts_matching); } - host->rrdpush_enable_replication = rrdpush_enable_replication; + if(rrdpush_enable_replication) + rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION); + else + rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION); + host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate; host->rrdpush_replication_step = rrdpush_replication_step; @@ -616,11 +620,14 @@ void rrdhost_update(RRDHOST *host rrdcalctemplate_index_init(host); rrdcalc_rrdhost_index_init(host); - host->rrdpush_enable_replication = rrdpush_enable_replication; + if(rrdpush_enable_replication) + rrdhost_option_set(host, RRDHOST_OPTION_REPLICATION); + else + rrdhost_option_clear(host, RRDHOST_OPTION_REPLICATION); + host->rrdpush_seconds_to_replicate = rrdpush_seconds_to_replicate; host->rrdpush_replic |