From fe45b4961e92b21d0de2a3e340f4563fb6b4be2b Mon Sep 17 00:00:00 2001 From: Andrew Moss <1043609+amoss@users.noreply.github.com> Date: Tue, 17 Dec 2019 19:15:17 +0100 Subject: Revert "Fix race condition in dbengine (#7533)" (#7560) We are removing this fix for further internal testing, it will be returning after we iron out some bugs. This reverts commit 53ab093d84919c743450199a31bca9a13412e451. --- database/engine/pagecache.c | 34 +++++++++++++--------------------- database/engine/pagecache.h | 1 - database/engine/rrdengine.c | 10 +++++----- database/engine/rrdengineapi.c | 10 +++++----- database/engine/rrdenglocking.c | 36 +++++++++++++++++------------------- 5 files changed, 40 insertions(+), 51 deletions(-) (limited to 'database') diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c index ef8c5c93ba..7ac4512d69 100644 --- a/database/engine/pagecache.c +++ b/database/engine/pagecache.c @@ -101,13 +101,6 @@ void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_descr *descr) uv_cond_broadcast(&pg_cache_descr->cond); } -void pg_cache_wake_up_waiters(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) -{ - rrdeng_page_descr_mutex_lock(ctx, descr); - pg_cache_wake_up_waiters_unsafe(descr); - rrdeng_page_descr_mutex_unlock(ctx, descr); -} - /* * The caller must hold page descriptor lock. * The lock will be released and re-acquired. The descriptor is not guaranteed @@ -142,8 +135,10 @@ unsigned long pg_cache_wait_event(struct rrdengine_instance *ctx, struct rrdeng_ /* * The caller must hold page descriptor lock. + * Gets a reference to the page descriptor. + * Returns 1 on success and 0 on failure. */ -int pg_cache_can_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access) +int pg_cache_try_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access) { struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; @@ -151,25 +146,25 @@ int pg_cache_can_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_acces (exclusive_access && pg_cache_descr->refcnt)) { return 0; } + if (exclusive_access) + pg_cache_descr->flags |= RRD_PAGE_LOCKED; + ++pg_cache_descr->refcnt; return 1; } /* * The caller must hold page descriptor lock. - * Gets a reference to the page descriptor. - * Returns 1 on success and 0 on failure. + * Same return values as pg_cache_try_get_unsafe() without doing anything. */ -int pg_cache_try_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access) +int pg_cache_can_get_unsafe(struct rrdeng_page_descr *descr, int exclusive_access) { struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr; - if (!pg_cache_can_get_unsafe(descr, exclusive_access)) + if ((pg_cache_descr->flags & (RRD_PAGE_LOCKED | RRD_PAGE_READ_PENDING)) || + (exclusive_access && pg_cache_descr->refcnt)) { return 0; - - if (exclusive_access) - pg_cache_descr->flags |= RRD_PAGE_LOCKED; - ++pg_cache_descr->refcnt; + } return 1; } @@ -434,11 +429,8 @@ void pg_cache_punch_hole(struct rrdengine_instance *ctx, struct rrdeng_page_desc uv_rwlock_wrunlock(&pg_cache->pg_cache_rwlock); } pg_cache_put(ctx, descr); - rrdeng_try_deallocate_pg_cache_descr(ctx, descr); - while (descr->pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED) { - rrdeng_try_deallocate_pg_cache_descr(ctx, descr); /* spin */ - (void)sleep_usec(1000); /* 1 msec */ - } + if (descr->pg_cache_descr_state & PG_CACHE_DESCR_ALLOCATED) + rrdeng_try_deallocate_pg_cache_descr(ctx, descr); destroy: freez(descr); pg_cache_update_metric_times(page_index); diff --git a/database/engine/pagecache.h b/database/engine/pagecache.h index 9fd9991b5e..7d8fa2a1df 100644 --- a/database/engine/pagecache.h +++ b/database/engine/pagecache.h @@ -148,7 +148,6 @@ struct page_cache { /* TODO: add statistics */ }; extern void pg_cache_wake_up_waiters_unsafe(struct rrdeng_page_descr *descr); -extern void pg_cache_wake_up_waiters(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); extern void pg_cache_wait_event_unsafe(struct rrdeng_page_descr *descr); extern unsigned long pg_cache_wait_event(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr); extern void pg_cache_replaceQ_insert(struct rrdengine_instance *ctx, diff --git a/database/engine/rrdengine.c b/database/engine/rrdengine.c index 3d49379048..9615665f85 100644 --- a/database/engine/rrdengine.c +++ b/database/engine/rrdengine.c @@ -121,19 +121,19 @@ after_crc_check: } else { (void) memcpy(page, uncompressed_buf + page_offset, descr->page_length); } + pg_cache_replaceQ_insert(ctx, descr); rrdeng_page_descr_mutex_lock(ctx, descr); pg_cache_descr = descr->pg_cache_descr; pg_cache_descr->page = page; pg_cache_descr->flags |= RRD_PAGE_POPULATED; pg_cache_descr->flags &= ~RRD_PAGE_READ_PENDING; - rrdeng_page_descr_mutex_unlock(ctx, descr); - pg_cache_replaceQ_insert(ctx, descr); + debug(D_RRDENGINE, "%s: Waking up waiters.", __func__); if (xt_io_descr->release_descr) { - pg_cache_put(ctx, descr); + pg_cache_put_unsafe(descr); } else { - debug(D_RRDENGINE, "%s: Waking up waiters.", __func__); - pg_cache_wake_up_waiters(ctx, descr); + pg_cache_wake_up_waiters_unsafe(descr); } + rrdeng_page_descr_mutex_unlock(ctx, descr); } if (!have_read_error && RRD_NO_COMPRESSION != header->compression_algorithm) { freez(uncompressed_buf); diff --git a/database/engine/rrdengineapi.c b/database/engine/rrdengineapi.c index c3f77b98a8..f824728798 100755 --- a/database/engine/rrdengineapi.c +++ b/database/engine/rrdengineapi.c @@ -473,16 +473,16 @@ storage_number rrdeng_load_metric_next(struct rrddim_query_handle *rrdimm_handle /* We need to get a new page */ if (descr) { /* Drop old page's reference */ -#ifdef NETDATA_INTERNAL_CHECKS - rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1); -#endif - pg_cache_put(ctx, descr); - handle->descr = NULL; handle->next_page_time = (page_end_time / USEC_PER_SEC) + 1; if (unlikely(handle->next_page_time > rrdimm_handle->end_time)) { goto no_more_metrics; } next_page_time = handle->next_page_time * USEC_PER_SEC; +#ifdef NETDATA_INTERNAL_CHECKS + rrd_stat_atomic_add(&ctx->stats.metric_API_consumers, -1); +#endif + pg_cache_put(ctx, descr); + handle->descr = NULL; } descr = pg_cache_lookup_next(ctx, handle->page_index, &handle->page_index->id, next_page_time, rrdimm_handle->end_time * USEC_PER_SEC); diff --git a/database/engine/rrdenglocking.c b/database/engine/rrdenglocking.c index 0c1d26f25a..754d802a23 100644 --- a/database/engine/rrdenglocking.c +++ b/database/engine/rrdenglocking.c @@ -20,7 +20,12 @@ struct page_cache_descr *rrdeng_create_pg_cache_descr(struct rrdengine_instance void rrdeng_destroy_pg_cache_descr(struct rrdengine_instance *ctx, struct page_cache_descr *pg_cache_descr) { + /* Flush any lock and condition variable users */ + uv_mutex_lock(&pg_cache_descr->mutex); + uv_mutex_unlock(&pg_cache_descr->mutex); + uv_cond_destroy(&pg_cache_descr->cond); + uv_mutex_destroy(&pg_cache_descr->mutex); freez(pg_cache_descr); rrd_stat_atomic_add(&ctx->stats.page_cache_descriptors, -1); @@ -95,7 +100,7 @@ void rrdeng_page_descr_mutex_lock(struct rrdengine_instance *ctx, struct rrdeng_ void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) { unsigned long old_state, new_state, ret_state, old_users; - struct page_cache_descr *pg_cache_descr, *delete_pg_cache_descr = NULL; + struct page_cache_descr *pg_cache_descr; uint8_t we_locked; uv_mutex_unlock(&descr->pg_cache_descr->mutex); @@ -111,8 +116,7 @@ void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrden ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, 0); if (old_state == ret_state) { /* success */ - rrdeng_destroy_pg_cache_descr(ctx, delete_pg_cache_descr); - return; + break; } continue; /* spin */ } @@ -124,15 +128,12 @@ void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrden pg_cache_descr = descr->pg_cache_descr; /* caller is the only page cache descriptor user and there are no pending references on the page */ if ((old_state & PG_CACHE_DESCR_DESTROY) && (1 == old_users) && - !pg_cache_descr->flags && !pg_cache_descr->refcnt) { - assert(!pg_cache_descr->waiters); - + !pg_cache_descr->flags && !pg_cache_descr->refcnt && !pg_cache_descr->waiters) { new_state = PG_CACHE_DESCR_LOCKED; ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); if (old_state == ret_state) { we_locked = 1; - delete_pg_cache_descr = pg_cache_descr; - descr->pg_cache_descr = NULL; + rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); /* retry */ continue; } @@ -149,6 +150,7 @@ void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrden } /* spin */ } + } /* @@ -159,11 +161,11 @@ void rrdeng_page_descr_mutex_unlock(struct rrdengine_instance *ctx, struct rrden void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct rrdeng_page_descr *descr) { unsigned long old_state, new_state, ret_state, old_users; - struct page_cache_descr *pg_cache_descr = NULL; - uint8_t just_locked, can_free, must_unlock; + struct page_cache_descr *pg_cache_descr; + uint8_t just_locked, we_freed, must_unlock; just_locked = 0; - can_free = 0; + we_freed = 0; must_unlock = 0; while (1) { /* spin */ old_state = descr->pg_cache_descr_state; @@ -175,11 +177,9 @@ void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct must_unlock = 1; just_locked = 0; /* Try deallocate if there are no pending references on the page */ - if (!pg_cache_descr->flags && !pg_cache_descr->refcnt) { - assert(!pg_cache_descr->waiters); - - descr->pg_cache_descr = NULL; - can_free = 1; + if (!pg_cache_descr->flags && !pg_cache_descr->refcnt && !pg_cache_descr->waiters) { + rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); + we_freed = 1; /* success */ continue; } @@ -188,7 +188,7 @@ void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct if (unlikely(must_unlock)) { assert(0 == old_users); - if (can_free) { + if (we_freed) { /* success */ new_state = 0; } else { @@ -198,8 +198,6 @@ void rrdeng_try_deallocate_pg_cache_descr(struct rrdengine_instance *ctx, struct ret_state = ulong_compare_and_swap(&descr->pg_cache_descr_state, old_state, new_state); if (old_state == ret_state) { /* unlocked */ - if (can_free) - rrdeng_destroy_pg_cache_descr(ctx, pg_cache_descr); return; } continue; /* spin */ -- cgit v1.2.3