summaryrefslogtreecommitdiffstats
path: root/database
diff options
context:
space:
mode:
authorStelios Fragkakis <52996999+stelfrag@users.noreply.github.com>2020-06-19 11:31:14 +0300
committerGitHub <noreply@github.com>2020-06-19 11:31:14 +0300
commit51cff0660b0eb671dbec42e54d7567db8b6ba712 (patch)
treef97d2df30f94fdd2bc93031d30825c03e6135da1 /database
parent8733f7c1bd737c11dd3e6ee9a9f65199d470cf5a (diff)
Improved error handling and recovery during compaction and metadata log replay (#9354)
* Improved error handling and recovery during compaction and metadata log replay
Diffstat (limited to 'database')
-rw-r--r--database/engine/metadata_log/compaction.c14
-rw-r--r--database/engine/metadata_log/logfile.c7
-rw-r--r--database/engine/metadata_log/metadatalog.c9
-rw-r--r--database/engine/metadata_log/metadatalog.h2
-rwxr-xr-xdatabase/engine/metadata_log/metadatalogapi.c28
-rwxr-xr-xdatabase/engine/metadata_log/metalogpluginsd.c79
-rwxr-xr-xdatabase/engine/rrdengineapi.c12
7 files changed, 98 insertions, 53 deletions
diff --git a/database/engine/metadata_log/compaction.c b/database/engine/metadata_log/compaction.c
index 951cebdc04..f7c3f1b4a4 100644
--- a/database/engine/metadata_log/compaction.c
+++ b/database/engine/metadata_log/compaction.c
@@ -87,7 +87,7 @@ static void compact_record_by_uuid(struct metalog_instance *ctx, uuid_t *uuid)
ret = find_object_by_guid(uuid, NULL, 0);
switch (ret) {
case GUID_TYPE_CHAR:
- fatal_assert(0);
+ error_with_guid(uuid, "Ignoring unexpected type GUID_TYPE_CHAR");
break;
case GUID_TYPE_CHART:
st = metalog_get_chart_from_uuid(ctx, uuid);
@@ -106,7 +106,12 @@ static void compact_record_by_uuid(struct metalog_instance *ctx, uuid_t *uuid)
case GUID_TYPE_DIMENSION:
rd = metalog_get_dimension_from_uuid(ctx, uuid);
if (rd) {
- if (ctx->current_compaction_id > rd->state->compaction_id) {
+ if (ctx->current_compaction_id > rd->rrdset->compaction_id) {
+ error("Forcing compaction of chart %s", rd->rrdset->id);
+ rd->rrdset->compaction_id = ctx->current_compaction_id;
+ buffer = metalog_update_chart_buffer(rd->rrdset, ctx->current_compaction_id);
+ metalog_commit_record(ctx, buffer, METALOG_COMMIT_CREATION_RECORD, rd->rrdset->chart_uuid, 1);
+ } else if (ctx->current_compaction_id > rd->state->compaction_id) {
rd->state->compaction_id = ctx->current_compaction_id;
buffer = metalog_update_dimension_buffer(rd);
metalog_commit_record(ctx, buffer, METALOG_COMMIT_CREATION_RECORD, uuid, 1);
@@ -129,8 +134,11 @@ static void compact_record_by_uuid(struct metalog_instance *ctx, uuid_t *uuid)
case GUID_TYPE_NOTFOUND:
debug(D_METADATALOG, "Ignoring nonexistent metadata record.");
break;
+ case GUID_TYPE_NOSPACE:
+ error_with_guid(uuid, "Not enough space for object retrieval");
+ break;
default:
- fatal_assert(0);
+ error("Unknown return code %u from find_object_by_guid", ret);
break;
}
}
diff --git a/database/engine/metadata_log/logfile.c b/database/engine/metadata_log/logfile.c
index 117c5e5eee..27a91e5ef2 100644
--- a/database/engine/metadata_log/logfile.c
+++ b/database/engine/metadata_log/logfile.c
@@ -524,13 +524,6 @@ static void iterate_records(struct metadata_logfile *metalogfile)
pos);
replay_record(metalogfile, header, buf + header->header_length);
- if (!uuid_is_null(state->uuid)) { /* It's a valid object */
- struct metalog_record record;
-
- uuid_copy(record.uuid, state->uuid);
- mlf_record_insert(metalogfile, &record);
- uuid_clear(state->uuid); /* Clear state for parsing of next record */
- }
}
freez(buf);
diff --git a/database/engine/metadata_log/metadatalog.c b/database/engine/metadata_log/metadatalog.c
index 6ebc2185ea..e163ff8bcc 100644
--- a/database/engine/metadata_log/metadatalog.c
+++ b/database/engine/metadata_log/metadatalog.c
@@ -406,3 +406,12 @@ error_after_loop_init:
/* wake up initialization thread */
complete(&ctx->metalog_completion);
}
+
+void error_with_guid(uuid_t *uuid, char *reason)
+{
+ char uuid_str[37];
+
+ uuid_unparse_lower(*uuid, uuid_str);
+ errno = 0;
+ error("%s (GUID = %s)", reason, uuid_str);
+} \ No newline at end of file
diff --git a/database/engine/metadata_log/metadatalog.h b/database/engine/metadata_log/metadatalog.h
index 004eec7ee3..bd2b0d9914 100644
--- a/database/engine/metadata_log/metadatalog.h
+++ b/database/engine/metadata_log/metadatalog.h
@@ -132,5 +132,5 @@ extern void metalog_test_quota(struct metalog_worker_config *wc);
extern void metalog_worker(void* arg);
extern void metalog_enq_cmd(struct metalog_worker_config *wc, struct metalog_cmd *cmd);
extern struct metalog_cmd metalog_deq_cmd(struct metalog_worker_config *wc);
-
+extern void error_with_guid(uuid_t *uuid, char *reason);
#endif /* NETDATA_METADATALOG_H */
diff --git a/database/engine/metadata_log/metadatalogapi.c b/database/engine/metadata_log/metadatalogapi.c
index e8a5d27442..5c8fb5e5ff 100755
--- a/database/engine/metadata_log/metadatalogapi.c
+++ b/database/engine/metadata_log/metadatalogapi.c
@@ -273,16 +273,21 @@ RRDSET *metalog_get_chart_from_uuid(struct metalog_instance *ctx, uuid_t *chart_
uuid_t *machine_guid, *chart_char_guid;
ret = find_object_by_guid(chart_uuid, chart_object, 33);
- fatal_assert(GUID_TYPE_CHART == ret);
+ if (unlikely(GUID_TYPE_CHART != ret))
+ return NULL;
machine_guid = (uuid_t *)chart_object;
RRDHOST *host = ctx->rrdeng_ctx->host;
- fatal_assert(!uuid_compare(host->host_uuid, *machine_guid));
+ if (unlikely(uuid_compare(host->host_uuid, *machine_guid))) {
+ error("Metadata host machine GUID does not match the one assosiated with the chart");
+ return NULL;
+ }
chart_char_guid = (uuid_t *)(chart_object + 16);
ret = find_object_by_guid(chart_char_guid, chart_fullid, RRD_ID_LENGTH_MAX + 1);
- fatal_assert(GUID_TYPE_CHAR == ret);
+ if (unlikely(GUID_TYPE_CHAR != ret))
+ return NULL;
RRDSET *st = rrdset_find(host, chart_fullid);
return st;
@@ -300,22 +305,29 @@ RRDDIM *metalog_get_dimension_from_uuid(struct metalog_instance *ctx, uuid_t *me
machine_guid = (uuid_t *)dim_object;
RRDHOST *host = ctx->rrdeng_ctx->host;
- fatal_assert(!uuid_compare(host->host_uuid, *machine_guid));
+ if (unlikely(uuid_compare(host->host_uuid, *machine_guid))) {
+ error("Metadata host machine GUID does not match the one assosiated with the dimension");
+ return NULL;
+ }
chart_guid = (uuid_t *)(dim_object + 16);
dim_char_guid = (uuid_t *)(dim_object + 16 + 16);
ret = find_object_by_guid(dim_char_guid, id_str, sizeof(id_str));
- fatal_assert(GUID_TYPE_CHAR == ret);
+ if (unlikely(GUID_TYPE_CHAR != ret))
+ return NULL;
ret = find_object_by_guid(chart_guid, chart_object, sizeof(chart_object));
- fatal_assert(GUID_TYPE_CHART == ret);
+ if (unlikely(GUID_TYPE_CHART != ret))
+ return NULL;
chart_char_guid = (uuid_t *)(chart_object + 16);
ret = find_object_by_guid(chart_char_guid, chart_fullid, RRD_ID_LENGTH_MAX + 1);
- fatal_assert(GUID_TYPE_CHAR == ret);
+ if (unlikely(GUID_TYPE_CHAR != ret))
+ return NULL;
RRDSET *st = rrdset_find(host, chart_fullid);
- fatal_assert(st);
+ if (!st)
+ return NULL;
RRDDIM *rd = rrddim_find(st, id_str);
diff --git a/database/engine/metadata_log/metalogpluginsd.c b/database/engine/metadata_log/metalogpluginsd.c
index da7e19377f..2c6dae49ff 100755
--- a/database/engine/metadata_log/metalogpluginsd.c
+++ b/database/engine/metadata_log/metalogpluginsd.c
@@ -116,41 +116,55 @@ PARSER_RC metalog_pluginsd_context_action(void *user, uuid_t *uuid)
ret = find_object_by_guid(uuid, object, 49);
switch (ret) {
- case GUID_TYPE_CHAR:
- fatal_assert(0);
- break;
- case GUID_TYPE_CHART:
- case GUID_TYPE_DIMENSION:
- host = ctx->rrdeng_ctx->host;
- switch (ret) {
- case GUID_TYPE_CHART:
- chart_char_guid = (uuid_t *)(object + 16);
-
- ret = find_object_by_guid(chart_char_guid, id_str, RRD_ID_LENGTH_MAX + 1);
- fatal_assert(GUID_TYPE_CHAR == ret);
- ((PARSER_USER_OBJECT *) user)->st = rrdset_find(host, id_str);
+ case GUID_TYPE_NOTFOUND:
+ if (unlikely(ctx->rrdeng_ctx->host && uuid_compare(ctx->rrdeng_ctx->host->host_uuid, *uuid)))
+ error_with_guid(uuid, "Failed to find valid context");
+ break;
+ case GUID_TYPE_CHAR:
+ error_with_guid(uuid, "Ignoring unexpected type GUID_TYPE_CHAR");
break;
+ case GUID_TYPE_CHART:
case GUID_TYPE_DIMENSION:
- chart_guid = (uuid_t *)(object + 16);
-
- ret = find_object_by_guid(chart_guid, chart_object, 33);
- fatal_assert(GUID_TYPE_CHART == ret);
- chart_char_guid = (uuid_t *)(chart_object + 16);
-
- ret = find_object_by_guid(chart_char_guid, id_str, RRD_ID_LENGTH_MAX + 1);
- fatal_assert(GUID_TYPE_CHAR == ret);
- ((PARSER_USER_OBJECT *) user)->st = rrdset_find(host, id_str);
+ host = ctx->rrdeng_ctx->host;
+ switch (ret) {
+ case GUID_TYPE_CHART:
+ chart_char_guid = (uuid_t *)(object + 16);
+
+ ret = find_object_by_guid(chart_char_guid, id_str, RRD_ID_LENGTH_MAX + 1);
+ if (unlikely(GUID_TYPE_CHAR != ret))
+ error_with_guid(uuid, "Failed to find valid chart name");
+ else
+ ((PARSER_USER_OBJECT *)user)->st = rrdset_find(host, id_str);
+ break;
+ case GUID_TYPE_DIMENSION:
+ chart_guid = (uuid_t *)(object + 16);
+
+ ret = find_object_by_guid(chart_guid, chart_object, 33);
+ if (unlikely(GUID_TYPE_CHART != ret)) {
+ error_with_guid(uuid, "Failed to find valid chart");
+ break;
+ }
+ chart_char_guid = (uuid_t *)(object + 16);
+
+ ret = find_object_by_guid(chart_char_guid, id_str, RRD_ID_LENGTH_MAX + 1);
+ if (unlikely(GUID_TYPE_CHAR != ret))
+ error_with_guid(uuid, "Failed to find valid chart name");
+ else
+ ((PARSER_USER_OBJECT *)user)->st = rrdset_find(host, id_str);
+ break;
+ default:
+ break;
+ }
+ break;
+ case GUID_TYPE_HOST:
+ /* Ignore for now */
+ break;
+ case GUID_TYPE_NOSPACE:
+ error_with_guid(uuid, "Not enough space for object retrieval");
break;
default:
- fatal_assert(0);
+ error("Unknown return code %u from find_object_by_guid", ret);
break;
- }
- break;
- case GUID_TYPE_HOST:
- /* Ignore for now */
- break;
- default:
- break;
}
return PARSER_RC_OK;
@@ -176,6 +190,8 @@ PARSER_RC metalog_pluginsd_tombstone_action(void *user, uuid_t *uuid)
rrdhost_wrlock(host);
rrdset_free(st);
rrdhost_unlock(host);
+ } else {
+ debug(D_METADATALOG, "Ignoring nonexistent chart metadata record.");
}
break;
case GUID_TYPE_DIMENSION:
@@ -186,6 +202,9 @@ PARSER_RC metalog_pluginsd_tombstone_action(void *user, uuid_t *uuid)
rrddim_free_custom(st, rd, 0);
rrdset_unlock(st);
}
+ else {
+ debug(D_METADATALOG, "Ignoring nonexistent dimension metadata record.");
+ }
break;
case GUID_TYPE_HOST:
/* Ignore for now */
diff --git a/database/engine/rrdengineapi.c b/database/engine/rrdengineapi.c
index daa96376ed..fc3bb90e2c 100755
--- a/database/engine/rrdengineapi.c
+++ b/database/engine/rrdengineapi.c
@@ -74,10 +74,14 @@ void rrdeng_metric_init(RRDDIM *rd, uuid_t *dim_uuid)
if (unlikely(find_or_generate_guid(rd, rd->state->metric_uuid, GUID_TYPE_DIMENSION,
replace_instead_of_generate))) {
errno = 0;
- error("FAILED to generate GUID for %s", rd->id);
- freez(rd->state->metric_uuid);
- rd->state->metric_uuid = NULL;
- fatal_assert(0);
+ error("FAILED to reuse GUID for %s", rd->id);
+ if (unlikely(find_or_generate_guid(rd, rd->state->metric_uuid, GUID_TYPE_DIMENSION, 0))) {
+ errno = 0;
+ error("FAILED to generate GUID for %s", rd->id);
+ freez(rd->state->metric_uuid);
+ rd->state->metric_uuid = NULL;
+ fatal_assert(0);
+ }
}
uv_rwlock_rdlock(&pg_cache->metrics_index.lock);