summaryrefslogtreecommitdiffstats
path: root/database/engine
diff options
context:
space:
mode:
authorMarkos Fountoulakis <44345837+mfundul@users.noreply.github.com>2019-09-24 11:59:15 +0300
committerGitHub <noreply@github.com>2019-09-24 11:59:15 +0300
commit2728be8b0614fd58d4b530a32fee164509e88fd8 (patch)
treeabd39375b249b832327e35880597f9974be81b22 /database/engine
parent7977137cee3274d2535fbdb49ec2d68941285b82 (diff)
Detect deadlock in dbengine page cache (#6911)
* Detect deadlock in dbengine page cache when there are too many metrics and print error message * Resolve dbengine deadlock by dropping metrics when page cache is too small and define relevant alarms * Changed printing deadlock errors to only happen once per dbengine instance
Diffstat (limited to 'database/engine')
-rw-r--r--database/engine/rrdengine.c2
-rw-r--r--database/engine/rrdengine.h13
-rw-r--r--database/engine/rrdengineapi.c30
-rw-r--r--database/engine/rrdengineapi.h4
4 files changed, 41 insertions, 8 deletions
diff --git a/database/engine/rrdengine.c b/database/engine/rrdengine.c
index 36d917541d..42f59b59f3 100644
--- a/database/engine/rrdengine.c
+++ b/database/engine/rrdengine.c
@@ -5,6 +5,8 @@
rrdeng_stats_t global_io_errors = 0;
rrdeng_stats_t global_fs_errors = 0;
+rrdeng_stats_t global_pg_cache_warnings = 0;
+rrdeng_stats_t global_pg_cache_errors = 0;
rrdeng_stats_t rrdeng_reserved_file_descriptors = 0;
void sanity_check(void)
diff --git a/database/engine/rrdengine.h b/database/engine/rrdengine.h
index 6f6a6f8ffd..6447a685bd 100644
--- a/database/engine/rrdengine.h
+++ b/database/engine/rrdengine.h
@@ -148,12 +148,25 @@ struct rrdengine_statistics {
rrdeng_stats_t page_cache_descriptors;
rrdeng_stats_t io_errors;
rrdeng_stats_t fs_errors;
+ rrdeng_stats_t pg_cache_warnings;
+ rrdeng_stats_t pg_cache_errors;
};
/* I/O errors global counter */
extern rrdeng_stats_t global_io_errors;
/* File-System errors global counter */
extern rrdeng_stats_t global_fs_errors;
+/*
+ * Page cache warnings global counter.
+ * Some page cache instance is near critical utilization where metrics will fail to be stored.
+ */
+extern rrdeng_stats_t global_pg_cache_warnings;
+/*
+ * Page cache errors global counter.
+ * Some page cache instance has hit critical utilization where metrics failed to be stored as a deadlock resolution
+ * measure.
+ */
+extern rrdeng_stats_t global_pg_cache_errors;
/* number of File-Descriptors that have been reserved by dbengine */
extern rrdeng_stats_t rrdeng_reserved_file_descriptors;
diff --git a/database/engine/rrdengineapi.c b/database/engine/rrdengineapi.c
index bf373f31c7..26fbe8f7ef 100644
--- a/database/engine/rrdengineapi.c
+++ b/database/engine/rrdengineapi.c
@@ -95,9 +95,8 @@ void rrdeng_store_metric_flush_current_page(RRDDIM *rd)
if (likely(descr->page_length)) {
int ret, page_is_empty;
-#ifdef NETDATA_INTERNAL_CHECKS
rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1);
-#endif
+
if (handle->prev_descr) {
/* unpin old second page */
pg_cache_put(ctx, handle->prev_descr);
@@ -192,9 +191,26 @@ void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, storage_number n
if (unlikely(INVALID_TIME == descr->start_time)) {
descr->start_time = point_in_time;
-#ifdef NETDATA_INTERNAL_CHECKS
rrd_stat_atomic_add(&ctx->stats.metric_API_producers, 1);
-#endif
+
+ if (unlikely(((unsigned long)ctx->stats.metric_API_producers) >= ctx->max_cache_pages)) {
+ if (0 == (unsigned long)ctx->stats.pg_cache_errors) {
+ /* only print the first time */
+ error("Deadlock detected in dbengine instance \"%s\", metric data will not be stored in the database"
+ ", please increase page cache size.", ctx->dbfiles_path);
+ }
+ rrd_stat_atomic_add(&ctx->stats.pg_cache_errors, 1);
+ rrd_stat_atomic_add(&global_pg_cache_errors, 1);
+ /* Resolve deadlock */
+ descr->page_length = 0; /* make sure the page descriptor is deconstructed */
+ rrdeng_store_metric_flush_current_page(rd);
+ rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1);
+ return;
+ } else if (unlikely(((unsigned long)ctx->stats.metric_API_producers) >= ctx->cache_pages_low_watermark)) {
+ rrd_stat_atomic_add(&ctx->stats.pg_cache_warnings, 1);
+ rrd_stat_atomic_add(&global_pg_cache_warnings, 1);
+ }
+
pg_cache_insert(ctx, handle->page_index, descr);
} else {
pg_cache_add_new_metric_time(handle->page_index, descr);
@@ -672,7 +688,7 @@ void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_i
* You must not change the indices of the statistics or user code will break.
* You must not exceed RRDENG_NR_STATS or it will crash.
*/
-void rrdeng_get_33_statistics(struct rrdengine_instance *ctx, unsigned long long *array)
+void rrdeng_get_35_statistics(struct rrdengine_instance *ctx, unsigned long long *array)
{
struct page_cache *pg_cache = &ctx->pg_cache;
@@ -709,7 +725,9 @@ void rrdeng_get_33_statistics(struct rrdengine_instance *ctx, unsigned long long
array[30] = (uint64_t)global_io_errors;
array[31] = (uint64_t)global_fs_errors;
array[32] = (uint64_t)rrdeng_reserved_file_descriptors;
- assert(RRDENG_NR_STATS == 33);
+ array[33] = (uint64_t)global_pg_cache_warnings;
+ array[34] = (uint64_t)global_pg_cache_errors;
+ assert(RRDENG_NR_STATS == 35);
}
/* Releases reference to page */
diff --git a/database/engine/rrdengineapi.h b/database/engine/rrdengineapi.h
index 9b1ab18742..97e358199e 100644
--- a/database/engine/rrdengineapi.h
+++ b/database/engine/rrdengineapi.h
@@ -8,7 +8,7 @@
#define RRDENG_MIN_PAGE_CACHE_SIZE_MB (32)
#define RRDENG_MIN_DISK_SPACE_MB (256)
-#define RRDENG_NR_STATS (33)
+#define RRDENG_NR_STATS (35)
#define RRDENG_FD_BUDGET_PER_INSTANCE (50)
@@ -41,7 +41,7 @@ extern int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_han
extern void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle);
extern time_t rrdeng_metric_latest_time(RRDDIM *rd);
extern time_t rrdeng_metric_oldest_time(RRDDIM *rd);
-extern void rrdeng_get_33_statistics(struct rrdengine_instance *ctx, unsigned long long *array);
+extern void rrdeng_get_35_statistics(struct rrdengine_instance *ctx, unsigned long long *array);
/* must call once before using anything */
extern int rrdeng_init(struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb,