diff options
Diffstat (limited to 'database/engine')
-rw-r--r-- | database/engine/README.md | 6 | ||||
-rw-r--r-- | database/engine/pagecache.c | 36 | ||||
-rw-r--r-- | database/engine/rrdengine.h | 13 | ||||
-rw-r--r-- | database/engine/rrdengineapi.c | 27 | ||||
-rw-r--r-- | database/engine/rrdengineapi.h | 6 |
5 files changed, 39 insertions, 49 deletions
diff --git a/database/engine/README.md b/database/engine/README.md index e824aa3a27..78f3b15ec8 100644 --- a/database/engine/README.md +++ b/database/engine/README.md @@ -57,7 +57,8 @@ The above values are the default and minimum values for Page Cache size and DB e in **MiB**. All DB engine instances will allocate the configured resources separately. The `page cache size` option determines the amount of RAM in **MiB** that is dedicated to caching Netdata metric values -themselves. +themselves as far as queries are concerned. The total page cache size will be greater since data collection itself will +consume additional memory as is described in the [Memory requirements](#memory-requirements) section. The `dbengine disk space` option determines the amount of disk space in **MiB** that is dedicated to storing Netdata metric values and all related metadata describing them. @@ -88,7 +89,8 @@ available memory. There are explicit memory requirements **per** DB engine **instance**, meaning **per** Netdata **node** (e.g. localhost and streaming recipient nodes): -- `page cache size` must be at least `#dimensions-being-collected x 4096 x 2` bytes. +- The total page cache memory footprint will be an additional `#dimensions-being-collected x 4096 x 2` bytes over what + the user configured with `page cache size`. - an additional `#pages-on-disk x 4096 x 0.03` bytes of RAM are allocated for metadata. diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c index 457bcb2185..a419ba9818 100644 --- a/database/engine/pagecache.c +++ b/database/engine/pagecache.c @@ -209,9 +209,31 @@ static void pg_cache_release_pages(struct rrdengine_instance *ctx, unsigned numb pg_cache_release_pages_unsafe(ctx, number); uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); } + +/* + * This function returns the maximum number of pages allowed in the page cache. + * The caller must hold the page cache lock. + */ +static inline unsigned long pg_cache_hard_limit(struct rrdengine_instance *ctx) +{ + /* it's twice the number of producers since we pin 2 pages per producer */ + return ctx->max_cache_pages + 2 * (unsigned long)ctx->stats.metric_API_producers; +} + +/* + * This function returns the low watermark number of pages in the page cache. The page cache should strive to keep the + * number of pages below that number. + * The caller must hold the page cache lock. + */ +static inline unsigned long pg_cache_soft_limit(struct rrdengine_instance *ctx) +{ + /* it's twice the number of producers since we pin 2 pages per producer */ + return ctx->cache_pages_low_watermark + 2 * (unsigned long)ctx->stats.metric_API_producers; +} + /* * This function will block until it reserves #number populated pages. - * It will trigger evictions or dirty page flushing if the ctx->max_cache_pages limit is hit. + * It will trigger evictions or dirty page flushing if the pg_cache_hard_limit() limit is hit. */ static void pg_cache_reserve_pages(struct rrdengine_instance *ctx, unsigned number) { @@ -223,10 +245,10 @@ static void pg_cache_reserve_pages(struct rrdengine_instance *ctx, unsigned numb assert(number < ctx->max_cache_pages); uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock); - if (pg_cache->populated_pages + number >= ctx->max_cache_pages + 1) + if (pg_cache->populated_pages + number >= pg_cache_hard_limit(ctx) + 1) debug(D_RRDENGINE, "==Page cache full. Reserving %u pages.==", number); - while (pg_cache->populated_pages + number >= ctx->max_cache_pages + 1) { + while (pg_cache->populated_pages + number >= pg_cache_hard_limit(ctx) + 1) { if (!pg_cache_try_evict_one_page_unsafe(ctx)) { /* failed to evict */ @@ -260,7 +282,7 @@ static void pg_cache_reserve_pages(struct rrdengine_instance *ctx, unsigned numb /* * This function will attempt to reserve #number populated pages. - * It may trigger evictions if the ctx->cache_pages_low_watermark limit is hit. + * It may trigger evictions if the pg_cache_soft_limit() limit is hit. * Returns 0 on failure and 1 on success. */ static int pg_cache_try_reserve_pages(struct rrdengine_instance *ctx, unsigned number) @@ -272,7 +294,7 @@ static int pg_cache_try_reserve_pages(struct rrdengine_instance *ctx, unsigned n assert(number < ctx->max_cache_pages); uv_rwlock_wrlock(&pg_cache->pg_cache_rwlock); - if (pg_cache->populated_pages + number >= ctx->cache_pages_low_watermark + 1) { + if (pg_cache->populated_pages + number >= pg_cache_soft_limit(ctx) + 1) { debug(D_RRDENGINE, "==Page cache full. Trying to reserve %u pages.==", number); @@ -280,11 +302,11 @@ static int pg_cache_try_reserve_pages(struct rrdengine_instance *ctx, unsigned n if (!pg_cache_try_evict_one_page_unsafe(ctx)) break; ++count; - } while (pg_cache->populated_pages + number >= ctx->cache_pages_low_watermark + 1); + } while (pg_cache->populated_pages + number >= pg_cache_soft_limit(ctx) + 1); debug(D_RRDENGINE, "Evicted %u pages.", count); } - if (pg_cache->populated_pages + number < ctx->max_cache_pages + 1) { + if (pg_cache->populated_pages + number < pg_cache_hard_limit(ctx) + 1) { pg_cache->populated_pages += number; ret = 1; /* success */ } diff --git a/database/engine/rrdengine.h b/database/engine/rrdengine.h index 6447a685bd..6f6a6f8ffd 100644 --- a/database/engine/rrdengine.h +++ b/database/engine/rrdengine.h @@ -148,25 +148,12 @@ struct rrdengine_statistics { rrdeng_stats_t page_cache_descriptors; rrdeng_stats_t io_errors; rrdeng_stats_t fs_errors; - rrdeng_stats_t pg_cache_warnings; - rrdeng_stats_t pg_cache_errors; }; /* I/O errors global counter */ extern rrdeng_stats_t global_io_errors; /* File-System errors global counter */ extern rrdeng_stats_t global_fs_errors; -/* - * Page cache warnings global counter. - * Some page cache instance is near critical utilization where metrics will fail to be stored. - */ -extern rrdeng_stats_t global_pg_cache_warnings; -/* - * Page cache errors global counter. - * Some page cache instance has hit critical utilization where metrics failed to be stored as a deadlock resolution - * measure. - */ -extern rrdeng_stats_t global_pg_cache_errors; /* number of File-Descriptors that have been reserved by dbengine */ extern rrdeng_stats_t rrdeng_reserved_file_descriptors; diff --git a/database/engine/rrdengineapi.c b/database/engine/rrdengineapi.c index 7f54439254..79fc02e93e 100644 --- a/database/engine/rrdengineapi.c +++ b/database/engine/rrdengineapi.c @@ -4,7 +4,7 @@ /* Default global database instance */ static struct rrdengine_instance default_global_ctx; -int default_rrdeng_page_cache_mb = 128; +int default_rrdeng_page_cache_mb = 32; int default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB; /* @@ -192,25 +192,6 @@ void rrdeng_store_metric_next(RRDDIM *rd, usec_t point_in_time, storage_number n descr->start_time = point_in_time; rrd_stat_atomic_add(&ctx->stats.metric_API_producers, 1); - - if (unlikely(((unsigned long)ctx->stats.metric_API_producers) >= ctx->max_cache_pages)) { - if (0 == (unsigned long)ctx->stats.pg_cache_errors) { - /* only print the first time */ - error("Deadlock detected in dbengine instance \"%s\", metric data will not be stored in the database" - ", please increase page cache size.", ctx->dbfiles_path); - } - rrd_stat_atomic_add(&ctx->stats.pg_cache_errors, 1); - rrd_stat_atomic_add(&global_pg_cache_errors, 1); - /* Resolve deadlock */ - descr->page_length = 0; /* make sure the page descriptor is deconstructed */ - rrdeng_store_metric_flush_current_page(rd); - rrd_stat_atomic_add(&ctx->stats.metric_API_producers, -1); - return; - } else if (unlikely(((unsigned long)ctx->stats.metric_API_producers) >= ctx->cache_pages_low_watermark)) { - rrd_stat_atomic_add(&ctx->stats.pg_cache_warnings, 1); - rrd_stat_atomic_add(&global_pg_cache_warnings, 1); - } - pg_cache_insert(ctx, handle->page_index, descr); } else { pg_cache_add_new_metric_time(handle->page_index, descr); @@ -692,7 +673,7 @@ void *rrdeng_get_page(struct rrdengine_instance *ctx, uuid_t *id, usec_t point_i * You must not change the indices of the statistics or user code will break. * You must not exceed RRDENG_NR_STATS or it will crash. */ -void rrdeng_get_35_statistics(struct rrdengine_instance *ctx, unsigned long long *array) +void rrdeng_get_33_statistics(struct rrdengine_instance *ctx, unsigned long long *array) { struct page_cache *pg_cache = &ctx->pg_cache; @@ -729,9 +710,7 @@ void rrdeng_get_35_statistics(struct rrdengine_instance *ctx, unsigned long long array[30] = (uint64_t)global_io_errors; array[31] = (uint64_t)global_fs_errors; array[32] = (uint64_t)rrdeng_reserved_file_descriptors; - array[33] = (uint64_t)global_pg_cache_warnings; - array[34] = (uint64_t)global_pg_cache_errors; - assert(RRDENG_NR_STATS == 35); + assert(RRDENG_NR_STATS == 33); } /* Releases reference to page */ diff --git a/database/engine/rrdengineapi.h b/database/engine/rrdengineapi.h index 97e358199e..c876705e4f 100644 --- a/database/engine/rrdengineapi.h +++ b/database/engine/rrdengineapi.h @@ -5,10 +5,10 @@ #include "rrdengine.h" -#define RRDENG_MIN_PAGE_CACHE_SIZE_MB (32) +#define RRDENG_MIN_PAGE_CACHE_SIZE_MB (8) #define RRDENG_MIN_DISK_SPACE_MB (256) -#define RRDENG_NR_STATS (35) +#define RRDENG_NR_STATS (33) #define RRDENG_FD_BUDGET_PER_INSTANCE (50) @@ -41,7 +41,7 @@ extern int rrdeng_load_metric_is_finished(struct rrddim_query_handle *rrdimm_han extern void rrdeng_load_metric_finalize(struct rrddim_query_handle *rrdimm_handle); extern time_t rrdeng_metric_latest_time(RRDDIM *rd); extern time_t rrdeng_metric_oldest_time(RRDDIM *rd); -extern void rrdeng_get_35_statistics(struct rrdengine_instance *ctx, unsigned long long *array); +extern void rrdeng_get_33_statistics(struct rrdengine_instance *ctx, unsigned long long *array); /* must call once before using anything */ extern int rrdeng_init(struct rrdengine_instance **ctxp, char *dbfiles_path, unsigned page_cache_mb, |