diff options
-rw-r--r-- | aclk/aclk.c | 2 | ||||
-rw-r--r-- | daemon/global_statistics.c | 69 | ||||
-rw-r--r-- | database/engine/cache.c | 2 | ||||
-rw-r--r-- | database/engine/datafile.h | 16 | ||||
-rw-r--r-- | database/engine/journalfile.c | 10 | ||||
-rw-r--r-- | database/engine/metric.c | 8 | ||||
-rw-r--r-- | database/engine/pagecache.c | 3 | ||||
-rw-r--r-- | database/engine/pdc.c | 189 | ||||
-rw-r--r-- | database/engine/pdc.h | 10 | ||||
-rw-r--r-- | database/engine/rrdengine.c | 177 | ||||
-rw-r--r-- | database/engine/rrdengine.h | 17 | ||||
-rwxr-xr-x | database/engine/rrdengineapi.c | 3 | ||||
-rw-r--r-- | database/engine/rrdengineapi.h | 3 | ||||
-rw-r--r-- | database/ram/rrddim_mem.c | 2 | ||||
-rw-r--r-- | database/rrdhost.c | 2 | ||||
-rw-r--r-- | database/sqlite/sqlite_metadata.c | 2 | ||||
-rw-r--r-- | libnetdata/july/july.c | 20 | ||||
-rw-r--r-- | libnetdata/july/july.h | 2 | ||||
-rw-r--r-- | ml/Dimension.cc | 2 | ||||
-rw-r--r-- | ml/Host.cc | 17 | ||||
-rw-r--r-- | streaming/compression.c | 2 | ||||
-rw-r--r-- | streaming/rrdpush.c | 2 | ||||
-rw-r--r-- | web/api/queries/query.c | 49 | ||||
-rw-r--r-- | web/api/web_api_v1.c | 2 |
24 files changed, 412 insertions, 199 deletions
diff --git a/aclk/aclk.c b/aclk/aclk.c index 7e5b1f8f8b..9579912521 100644 --- a/aclk/aclk.c +++ b/aclk/aclk.c @@ -459,7 +459,7 @@ static int aclk_block_till_recon_allowed() { next_connection_attempt = now_realtime_sec() + (recon_delay / MSEC_PER_SEC); last_backoff_value = (float)recon_delay / MSEC_PER_SEC; - info("Wait before attempting to reconnect in %.3f seconds\n", recon_delay / (float)MSEC_PER_SEC); + info("Wait before attempting to reconnect in %.3f seconds", recon_delay / (float)MSEC_PER_SEC); // we want to wake up from time to time to check netdata_exit while (recon_delay) { diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index 40e8795b82..b519e70511 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -1023,6 +1023,15 @@ struct dbengine2_cache_pointers { RRDDIM *rd_pgc_memory_evicting; RRDDIM *rd_pgc_memory_flushing; + RRDSET *st_pgc_tm; + RRDDIM *rd_pgc_tm_current; + RRDDIM *rd_pgc_tm_wanted; + RRDDIM *rd_pgc_tm_hot_max; + RRDDIM *rd_pgc_tm_dirty_max; + RRDDIM *rd_pgc_tm_hot; + RRDDIM *rd_pgc_tm_dirty; + RRDDIM *rd_pgc_tm_referenced; + RRDSET *st_pgc_pages; RRDDIM *rd_pgc_pages_clean; RRDDIM *rd_pgc_pages_hot; @@ -1168,7 +1177,6 @@ static void dbengine2_cache_statistics_charts(struct dbengine2_cache_pointers *p } { - if (unlikely(!ptrs->st_pgc_memory)) { BUFFER *id = buffer_create(100); buffer_sprintf(id, "dbengine_%s_cache_memory", name); @@ -1223,6 +1231,56 @@ static void dbengine2_cache_statistics_charts(struct dbengine2_cache_pointers *p } { + if (unlikely(!ptrs->st_pgc_tm)) { + BUFFER *id = buffer_create(100); + buffer_sprintf(id, "dbengine_%s_target_memory", name); + + BUFFER *family = buffer_create(100); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100); + buffer_sprintf(title, "Netdata %s Target Cache Memory", name); + + ptrs->st_pgc_tm = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "bytes", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_pgc_tm_current = rrddim_add(ptrs->st_pgc_tm, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_wanted = rrddim_add(ptrs->st_pgc_tm, "wanted", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_referenced = rrddim_add(ptrs->st_pgc_tm, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_hot_max = rrddim_add(ptrs->st_pgc_tm, "hot max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_dirty_max = rrddim_add(ptrs->st_pgc_tm, "dirty max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_hot = rrddim_add(ptrs->st_pgc_tm, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_dirty = rrddim_add(ptrs->st_pgc_tm, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_current, (collected_number)pgc_stats->current_cache_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_wanted, (collected_number)pgc_stats->wanted_cache_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_referenced, (collected_number)pgc_stats->referenced_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_hot_max, (collected_number)pgc_stats->queues.hot.max_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_dirty_max, (collected_number)pgc_stats->queues.dirty.max_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_hot, (collected_number)pgc_stats->queues.hot.size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_dirty, (collected_number)pgc_stats->queues.dirty.size); + + rrdset_done(ptrs->st_pgc_tm); + } + + { if (unlikely(!ptrs->st_pgc_pages)) { BUFFER *id = buffer_create(100); buffer_sprintf(id, "dbengine_%s_cache_pages", name); @@ -1972,6 +2030,7 @@ static void dbengine2_statistics_charts(void) { static RRDDIM *rd_unavailable = NULL; static RRDDIM *rd_unroutable = NULL; static RRDDIM *rd_not_found = NULL; + static RRDDIM *rd_cancelled = NULL; static RRDDIM *rd_invalid_extent = NULL; static RRDDIM *rd_extent_merged = NULL; @@ -1996,9 +2055,10 @@ static void dbengine2_statistics_charts(void) { rd_mmap_failed = rrddim_add(st_query_pages_from_disk, "fail cant mmap", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); rd_unavailable = rrddim_add(st_query_pages_from_disk, "fail unavailable", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); rd_unroutable = rrddim_add(st_query_pages_from_disk, "fail unroutable", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_not_found = rrddim_add(st_query_pages_from_disk, "fail uuid not found", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_not_found = rrddim_add(st_query_pages_from_disk, "fail not found", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); rd_invalid_extent = rrddim_add(st_query_pages_from_disk, "fail invalid extent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); rd_extent_merged = rrddim_add(st_query_pages_from_disk, "extent merged", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cancelled = rrddim_add(st_query_pages_from_disk, "cancelled", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); } priority++; @@ -2008,7 +2068,8 @@ static void dbengine2_statistics_charts(void) { rrddim_set_by_pointer(st_query_pages_from_disk, rd_mmap_failed, (collected_number)cache_efficiency_stats.pages_load_fail_cant_mmap_extent); rrddim_set_by_pointer(st_query_pages_from_disk, rd_unavailable, (collected_number)cache_efficiency_stats.pages_load_fail_datafile_not_available); rrddim_set_by_pointer(st_query_pages_from_disk, rd_unroutable, (collected_number)cache_efficiency_stats.pages_load_fail_unroutable); - rrddim_set_by_pointer(st_query_pages_from_disk, rd_not_found, (collected_number)cache_efficiency_stats.pages_load_fail_uuid_not_found); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_not_found, (collected_number)cache_efficiency_stats.pages_load_fail_not_found); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_cancelled, (collected_number)cache_efficiency_stats.pages_load_fail_cancelled); rrddim_set_by_pointer(st_query_pages_from_disk, rd_invalid_extent, (collected_number)cache_efficiency_stats.pages_load_fail_invalid_extent); rrddim_set_by_pointer(st_query_pages_from_disk, rd_extent_merged, (collected_number)cache_efficiency_stats.pages_load_extent_merged); @@ -3778,10 +3839,12 @@ void *global_statistics_main(void *ptr) worker_is_busy(WORKER_JOB_REGISTRY); registry_statistics(); +#ifdef ENABLE_DBENGINE if(dbengine_enabled) { worker_is_busy(WORKER_JOB_DBENGINE); dbengine2_statistics_charts(); } +#endif worker_is_busy(WORKER_JOB_HEARTBEAT); update_heartbeat_charts(); diff --git a/database/engine/cache.c b/database/engine/cache.c index dfa707c9e1..e059fd235c 100644 --- a/database/engine/cache.c +++ b/database/engine/cache.c @@ -310,7 +310,7 @@ static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) { current_cache_size = __atomic_load_n(&cache->stats.size, __ATOMIC_RELAXED); - per1000 = current_cache_size * 1000 / wanted_cache_size; + per1000 = (size_t)((unsigned long long)current_cache_size * 1000UL / (unsigned long long)wanted_cache_size); __atomic_store_n(&cache->usage.per1000, per1000, __ATOMIC_RELAXED); __atomic_store_n(&cache->stats.wanted_cache_size, wanted_cache_size, __ATOMIC_RELAXED); diff --git a/database/engine/datafile.h b/database/engine/datafile.h index 373c0281d2..62b7754fb7 100644 --- a/database/engine/datafile.h +++ b/database/engine/datafile.h @@ -24,7 +24,14 @@ struct rrdengine_instance; #define MAX_DATAFILES (65536) /* Supports up to 64TiB for now */ #define TARGET_DATAFILES (50) -#define DATAFILE_IDEAL_IO_SIZE (1048576U) +typedef enum __attribute__ ((__packed__)) { + DATAFILE_ACQUIRE_OPEN_CACHE = 0, + DATAFILE_ACQUIRE_PAGE_DETAILS, + DATAFILE_ACQUIRE_RETENTION, + + // terminator + DATAFILE_ACQUIRE_MAX, +} DATAFILE_ACQUIRE_REASONS; /* only one event loop is supported for now */ struct rrdengine_datafile { @@ -47,7 +54,7 @@ struct rrdengine_datafile { struct { SPINLOCK spinlock; unsigned lockers; - unsigned lockers_by_reason[2]; + unsigned lockers_by_reason[DATAFILE_ACQUIRE_MAX]; bool available; time_t time_to_evict; } users; @@ -58,11 +65,6 @@ struct rrdengine_datafile { } extent_queries; }; -typedef enum __attribute__ ((__packed__)) { - DATAFILE_ACQUIRE_OPEN_CACHE = 0, - DATAFILE_ACQUIRE_PAGE_DETAILS = 1, -} DATAFILE_ACQUIRE_REASONS; - void datafile_acquire_dup(struct rrdengine_datafile *df); bool datafile_acquire(struct rrdengine_datafile *df, DATAFILE_ACQUIRE_REASONS reason); void datafile_release(struct rrdengine_datafile *df, DATAFILE_ACQUIRE_REASONS reason); diff --git a/database/engine/journalfile.c b/database/engine/journalfile.c index 5ad383d850..0ba0b9b440 100644 --- a/database/engine/journalfile.c +++ b/database/engine/journalfile.c @@ -652,7 +652,7 @@ static int check_journal_v2_file(void *data_start, size_t file_size, uint32_t or } metric++; - if (((uint8_t *) metric - (uint8_t *) data_start) > (uint32_t) file_size) { + if ((uint32_t)((uint8_t *) metric - (uint8_t *) data_start) > (uint32_t) file_size) { info("DBENGINE: verification failed EOF reached -- total entries %u, verified %u", entries, verified); return 1; } @@ -919,13 +919,13 @@ void do_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno __maybe_ struct rrdengine_instance *ctx = (struct rrdengine_instance *) section; struct rrdengine_journalfile *journalfile = (struct rrdengine_journalfile *) user_data; struct rrdengine_datafile *datafile = journalfile->datafile; - time_t min_time_s = LLONG_MAX; + time_t min_time_s = LONG_MAX; time_t max_time_s = 0; struct jv2_metrics_info *metric_info; generate_journalfilepath_v2(datafile, path, sizeof(path)); - info("DBENGINE: indexing file '%s': extents %lu, metrics %lu, pages %lu", + info("DBENGINE: indexing file '%s': extents %zu, metrics %zu, pages %zu", path, number_of_extents, number_of_metrics, @@ -1062,7 +1062,7 @@ void do_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno __maybe_ // Calculate start of the pages start for next descriptor pages_offset += (metric_info->number_of_pages * (sizeof(struct journal_page_list)) + sizeof(struct journal_page_header) + sizeof(struct journal_v2_block_trailer)); // Verify we are at the right location - if (pages_offset != (next_page_address - data_start)) { + if (pages_offset != (uint32_t)(next_page_address - data_start)) { // make sure checks fail so that we abort data = data_start; break; @@ -1092,7 +1092,7 @@ void do_migrate_to_v2_callback(Word_t section, unsigned datafile_fileno __maybe_ internal_error(true, "DBENGINE: FILE COMPLETED --------> %llu", (now_realtime_usec() - start_loading) / USEC_PER_MS); - info("DBENGINE: migrated journal file '%s', file size %lu", path, total_file_size); + info("DBENGINE: migrated journal file '%s', file size %zu", path, total_file_size); SET_JOURNAL_DATA(journalfile, data_start); SET_JOURNAL_DATA_SIZE(journalfile, total_file_size); diff --git a/database/engine/metric.c b/database/engine/metric.c index f468da6aef..f34aaed246 100644 --- a/database/engine/metric.c +++ b/database/engine/metric.c @@ -331,11 +331,11 @@ time_t mrg_metric_get_first_time_s(MRG *mrg __maybe_unused, METRIC *metric) { bool mrg_metric_set_clean_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t latest_time_s) { netdata_spinlock_lock(&metric->timestamps_lock); - internal_fatal(latest_time_s > now_realtime_sec() + 1, - "DBENGINE METRIC: metric latest time is in the future"); +// internal_fatal(latest_time_s > now_realtime_sec() + 1, +// "DBENGINE METRIC: metric latest time is in the future"); - internal_fatal(metric->latest_time_s_clean > latest_time_s, - "DBENGINE METRIC: metric new clean latest time is older than the previous one"); +// internal_fatal(metric->latest_time_s_clean > latest_time_s, +// "DBENGINE METRIC: metric new clean latest time is older than the previous one"); metric->latest_time_s_clean = latest_time_s; diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c index 2d8b56f275..0c3271d519 100644 --- a/database/engine/pagecache.c +++ b/database/engine/pagecache.c @@ -440,6 +440,7 @@ static size_t list_has_time_gaps( (*pages_pending)++; if (pd->status & PDC_PAGE_DISK_PENDING) { + internal_fatal(pd->status & PDC_PAGE_SKIP, "page is disk pending and skipped"); internal_fatal(!pd->datafile.ptr, "datafile is NULL"); internal_fatal(!pd->datafile.extent.bytes, "datafile.extent.bytes zero"); internal_fatal(!pd->datafile.extent.pos, "datafile.extent.pos is zero"); @@ -633,7 +634,7 @@ static Pvoid_t get_page_list( pass1_ut = now_monotonic_usec(); size_t pages_pass1 = get_page_list_from_pgc(main_cache, metric, ctx, wanted_start_time_s, wanted_end_time_s, &JudyL_page_array, &cache_gaps, - false, PDC_PAGE_PRELOADED_PASS1 | PDC_PAGE_SOURCE_MAIN_CACHE); + false, PDC_PAGE_SOURCE_MAIN_CACHE); query_gaps += cache_gaps; pages_found_in_main_cache += pages_pass1; pages_total += pages_pass1; diff --git a/database/engine/pdc.c b/database/engine/pdc.c index f0ccf20c1c..240d6d35aa 100644 --- a/database/engine/pdc.c +++ b/database/engine/pdc.c @@ -12,6 +12,7 @@ struct extent_page_details_list { struct rrdengine_datafile *datafile; struct rrdeng_cmd *cmd; + bool head_to_datafile_extent_queries_pending_for_extent; struct { struct extent_page_details_list *prev; @@ -59,18 +60,24 @@ static struct { }, }; -void pdc_cleanup(void) { - netdata_spinlock_lock(&pdc_globals.protected.spinlock); +void pdc_cleanup1(void) { + PDC *item = NULL; + + if(!netdata_spinlock_trylock(&pdc_globals.protected.spinlock)) + return; - while(pdc_globals.protected.available_items && pdc_globals.protected.available > (size_t)libuv_worker_threads) { - PDC *item = pdc_globals.protected.available_items; + if(pdc_globals.protected.available_items && pdc_globals.protected.available > (size_t)libuv_worker_threads) { + item = pdc_globals.protected.available_items; DOUBLE_LINKED_LIST_REMOVE_UNSAFE(pdc_globals.protected.available_items, item, cache.prev, cache.next); - freez(item); pdc_globals.protected.available--; - __atomic_sub_fetch(&pdc_globals.atomics.allocated, 1, __ATOMIC_RELAXED); } netdata_spinlock_unlock(&pdc_globals.protected.spinlock); + + if(item) { + freez(item); + __atomic_sub_fetch(&pdc_globals.atomics.allocated, 1, __ATOMIC_RELAXED); + } } PDC *pdc_get(void) { @@ -132,18 +139,24 @@ static struct { }, }; -void page_details_cleanup(void) { - netdata_spinlock_lock(&page_details_globals.protected.spinlock); +void page_details_cleanup1(void) { + struct page_details *item = NULL; - while(page_details_globals.protected.available_items && page_details_globals.protected.available > (size_t)libuv_worker_threads * 2) { - struct page_details *item = page_details_globals.protected.available_items; + if(!netdata_spinlock_trylock(&page_details_globals.protected.spinlock)) + return; + + if(page_details_globals.protected.available_items && page_details_globals.protected.available > (size_t)libuv_worker_threads * 2) { + item = page_details_globals.protected.available_items; DOUBLE_LINKED_LIST_REMOVE_UNSAFE(page_details_globals.protected.available_items, item, cache.prev, cache.next); - freez(item); page_details_globals.protected.available--; - __atomic_sub_fetch(&page_details_globals.atomics.allocated, 1, __ATOMIC_RELAXED); } netdata_spinlock_unlock(&page_details_globals.protected.spinlock); + + if(item) { + freez(item); + __atomic_sub_fetch(&page_details_globals.atomics.allocated, 1, __ATOMIC_RELAXED); + } } struct page_details *page_details_get(void) { @@ -205,18 +218,24 @@ static struct { }, }; -void epdl_cleanup(void) { - netdata_spinlock_lock(&epdl_globals.protected.spinlock); +void epdl_cleanup1(void) { + EPDL *item = NULL; - while(epdl_globals.protected.available_items && epdl_globals.protected.available > 100) { - EPDL *item = epdl_globals.protected.available_items; + if(!netdata_spinlock_trylock(&epdl_globals.protected.spinlock)) + return; + + if(epdl_globals.protected.available_items && epdl_globals.protected.available > 100) { + item = epdl_globals.protected.available_items; DOUBLE_LINKED_LIST_REMOVE_UNSAFE(epdl_globals.protected.available_items, item, cache.prev, cache.next); - freez(item); epdl_globals.protected.available--; - __atomic_sub_fetch(&epdl_globals.atomics.allocated, 1, __ATOMIC_RELAXED); } netdata_spinlock_unlock(&epdl_globals.protected.spinlock); + + if(item) { + freez(item); + __atomic_sub_fetch(&epdl_globals.atomics.allocated, 1, __ATOMIC_RELAXED); + } } static EPDL *epdl_get(void) { @@ -278,18 +297,24 @@ static struct { }, }; -void deol_cleanup(void) { - netdata_spinlock_lock(&deol_globals.protected.spinlock); +void deol_cleanup1(void) { + DEOL *item = NULL; - while(deol_globals.protected.available_items && deol_globals.protected.available > 100) { - DEOL *item = deol_globals.protected.available_items; + if(!netdata_spinlock_trylock(&deol_globals.protected.spinlock)) + return; + + if(deol_globals.protected.available_items && deol_globals.protected.available > 100) { + item = deol_globals.protected.available_items; DOUBLE_LINKED_LIST_REMOVE_UNSAFE(deol_globals.protected.available_items, item, cache.prev, cache.next); - freez(item); deol_globals.protected.available--; - __atomic_sub_fetch(&deol_globals.atomics.allocated, 1, __ATOMIC_RELAXED); } netdata_spinlock_unlock(&deol_globals.protected.spinlock); + + if(item) { + freez(item); + __atomic_sub_fetch(&deol_globals.atomics.allocated, 1, __ATOMIC_RELAXED); + } } static DEOL *deol_get(void) { @@ -366,20 +391,26 @@ void extent_buffer_init(void) { extent_buffer_globals.max_size = max_size; } -void extent_buffer_cleanup(void) { - netdata_spinlock_lock(&extent_buffer_globals.protected.spinlock); +void extent_buffer_cleanup1(void) { + struct extent_buffer *item = NULL; - while(extent_buffer_globals.protected.available_items && extent_buffer_globals.protected.available > 1) { - struct extent_buffer *item = extent_buffer_globals.protected.available_items; - size_t bytes = sizeof(struct extent_buffer) + item->bytes; + if(!netdata_spinlock_trylock(&extent_buffer_globals.protected.spinlock)) + return; + + if(extent_buffer_globals.protected.available_items && extent_buffer_globals.protected.available > 1) { + item = extent_buffer_globals.protected.available_items; DOUBLE_LINKED_LIST_REMOVE_UNSAFE(extent_buffer_globals.protected.available_items, item, cache.prev, cache.next); - freez(item); extent_buffer_globals.protected.available--; - __atomic_sub_fetch(&extent_buffer_globals.atomics.allocated, 1, __ATOMIC_RELAXED); - __atomic_sub_fetch(&extent_buffer_globals.atomics.allocated_bytes, bytes, __ATOMIC_RELAXED); } netdata_spinlock_unlock(&extent_buffer_globals.protected.spinlock); + + if(item) { + size_t bytes = sizeof(struct extent_buffer) + item->bytes; + freez(item); + __atomic_sub_fetch(&extent_buffer_globals.atomics.allocated, 1, __ATOMIC_RELAXED); + __atomic_sub_fetch(&extent_buffer_globals.atomics.allocated_bytes, bytes, __ATOMIC_RELAXED); + } } struct extent_buffer *extent_buffer_get(size_t size) { @@ -462,7 +493,7 @@ static void epdl_mark_all_not_loaded_pages_as_failed(EPDL *epdl, PDC_PAGE_STATUS while ((PValue = PDCJudyLFirstThenNext(*pd_by_start_time_s_JudyL, &start_time_index, &start_time_first))) { struct page_details *pd = *PValue; - if(!pd->page) { + if(!pd->page && !pdc_page_status_check(pd, PDC_PAGE_FAILED|PDC_PAGE_READY)) { pdc_page_status_set(pd, PDC_PAGE_FAILED | tags); pages_matched++; } @@ -522,7 +553,7 @@ static void pdc_destroy(PDC *pdc) { struct page_details *pd; Word_t time_index = 0; bool first_then_next = true; - size_t unroutable = 0; + size_t unroutable = 0, cancelled = 0; while((PValue = PDCJudyLFirstThenNext(pdc->page_list_JudyL, &time_index, &first_then_next))) { pd = *PValue; @@ -536,10 +567,12 @@ static void pdc_destroy(PDC *pdc) { internal_fatal(pd->datafile.ptr, "DBENGINE: page details has a datafile.ptr that is not released."); - if(!pd->page && !(status & (PDC_PAGE_READY | PDC_PAGE_FAILED | PDC_PAGE_RELEASED | PDC_PAGE_SKIP | PDC_PAGE_INVALID))) { + if(!pd->page && !(status & (PDC_PAGE_READY | PDC_PAGE_FAILED | PDC_PAGE_RELEASED | PDC_PAGE_SKIP | PDC_PAGE_INVALID | PDC_PAGE_CANCELLED))) { // pdc_page_status_set(pd, PDC_PAGE_FAILED); unroutable++; } + else if(!pd->page && (status & PDC_PAGE_CANCELLED)) + cancelled++; if(pd->page && !(status & PDC_PAGE_RELEASED)) { pgc_page_release(main_cache, pd->page); @@ -557,6 +590,9 @@ static void pdc_destroy(PDC *pdc) { if(unroutable) __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_load_fail_unroutable, unroutable, __ATOMIC_RELAXED); + + if(cancelled) + __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_load_fail_cancelled, cancelled, __ATOMIC_RELAXED); } void pdc_acquire(PDC *pdc) { @@ -619,10 +655,13 @@ static bool epdl_pending_add(EPDL *epdl) { EPDL *base = *PValue; - if(!base) + if(!base) { added_new = true; + epdl->head_to_datafile_extent_queries_pending_for_extent = true; + } else { added_new = false; + epdl->head_to_datafile_extent_queries_pending_for_extent = false; __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_load_extent_merged, 1, __ATOMIC_RELAXED); if(base->pdc->priority > epdl->pdc->priority) @@ -639,9 +678,12 @@ static bool epdl_pending_add(EPDL *epdl) { static void epdl_pending_del(EPDL *epdl) { netdata_spinlock_lock(&epdl->datafile->extent_queries.spinlock); - int rc = JudyLDel(&epdl->datafile->extent_queries.pending_epdl_by_extent_offset_judyL, epdl->extent_offset, PJE0); - (void)rc; - internal_fatal(!rc, "DBENGINE: epdl not found in pending list"); + if(epdl->head_to_datafile_extent_queries_pending_for_extent) { + epdl->head_to_datafile_extent_queries_pending_for_extent = false; + int rc = JudyLDel(&epdl->datafile->extent_queries.pending_epdl_by_extent_offset_judyL, epdl->extent_offset, PJE0); + (void) rc; + internal_fatal(!rc, "DBENGINE: epdl not found in pending list"); + } netdata_spinlock_unlock(&epdl->datafile->extent_queries.spinlock); } @@ -858,22 +900,32 @@ inline VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_ return vd; } -static struct page_details *epdl_to_pd_load_list(EPDL *epdl, Word_t metric_id, time_t start_time_s) { +static inline struct page_details *epdl_get_pd_load_link_list_from_metric_start_time(EPDL *epdl, Word_t metric_id, time_t start_time_s) { + + if(unlikely(epdl->head_to_datafile_extent_queries_pending_for_extent)) + // stop appending more pages to this epdl + epdl_pending_del(epdl); + struct page_details *pd_list = NULL; for(EPDL *ep = epdl; ep ;ep = ep->query.next) { Pvoid_t *pd_by_start_time_s_judyL = PDCJudyLGet(ep->page_details_by_metric_id_JudyL, metric_id, PJE0); internal_fatal(pd_by_start_time_s_judyL == PJERR, "DBENGINE: corrupted extent metrics JudyL"); - if (pd_by_start_time_s_judyL && *pd_by_start_time_s_judyL) { + if (unlikely(pd_by_start_time_s_judyL && *pd_by_start_time_s_judyL)) { Pvoid_t *pd_pptr = PDCJudyLGet(*pd_by_start_time_s_judyL, start_time_s, PJE0); internal_fatal(pd_pptr == PJERR, "DBENGINE: corrupted metric page details JudyHS"); - if (pd_pptr && *pd_pptr) { + if(likely(pd_pptr && *pd_pptr)) { struct page_details *pd = *pd_pptr; internal_fatal(metric_id != pd->metric_id, "DBENGINE: metric ids do not match"); - DOUBLE_LINKED_LIST_APPEND_UNSAFE(pd_list, pd, load.prev, load.next); + if(likely(!pd->page)) { + if (unlikely(__atomic_load_n(&ep->pdc->workers_should_stop, __ATOMIC_RELAXED))) + pdc_page_status_set(pd, PDC_PAGE_FAILED | PDC_PAGE_CANCELLED); + else + DOUBLE_LINKED_LIST_APPEND_UNSAFE(pd_list, pd, load.prev, load.next); + } } } } @@ -904,6 +956,14 @@ static bool epdl_populate_pages_from_extent_data( bool can_use_data = true; if(data_length < sizeof(*header) + sizeof(header->descr[0]) + sizeof(*trailer)) { can_use_data = false; + + // added to satisfy the requirements of older compilers (prevent warnings) + payload_length = 0; + payload_offset = 0; + trailer_offset = 0; + count = 0; + header = NULL; + trailer = NULL; } else { header = data; @@ -1006,7 +1066,7 @@ static bool epdl_populate_pages_from_extent_data( } mrg_metric_release(main_mrg, metric); - struct page_details *pd_list = epdl_to_pd_load_list(epdl, metric_id, start_time_s); + struct page_details *pd_list = epdl_get_pd_load_link_list_from_metric_start_time(epdl, metric_id, start_time_s); if(likely(!pd_list)) continue; @@ -1114,7 +1174,8 @@ static bool epdl_populate_pages_from_extent_data( } void epdl_find_extent_and_populate_pages(struct rrdengine_instance *ctx, EPDL *epdl, bool worker) { - epdl_pending_del(epdl); + size_t *statistics_counter = NULL; + PDC_PAGE_STATUS not_loaded_pages_tag = 0, loaded_pages_tag = 0; bool should_stop = __atomic_load_n(&epdl->pdc->workers_should_stop, __ATOMIC_RELAXED); for(EPDL *ep = epdl->query.next; ep ;ep = ep->query.next) { @@ -1123,20 +1184,21 @@ void epdl_find_extent_and_populate_pages(struct rrdengine_instance *ctx, EPDL *e internal_fatal(ep->extent_size != epdl->extent_size, "DBENGINE: extent sizes do not match"); internal_fatal(ep->file != epdl->file, "DBENGINE: files do not match"); - PDC *pdc = ep->pdc; - if(!__atomic_load_n(&pdc->workers_should_stop, __ATOMIC_RELAXED)) { + if(!__atomic_load_n(&ep->pdc->workers_should_stop, __ATOMIC_RELAXED)) { should_stop = false; break; } } - if(should_stop) + if(unlikely(should_stop)) { + statistics_counter = &rrdeng_cache_efficiency_stats.pages_load_fail_cancelled; + not_loaded_pages_tag = PDC_PAGE_CANCELLED; goto cleanup; + } if(worker) worker_is_busy(UV_EVENT_EXTENT_CACHE); - PDC_PAGE_STATUS not_loaded_pages_tag = 0, loaded_pages_tag = 0; bool extent_found_in_cache = false; void *extent_compressed_data = NULL; @@ -1150,8 +1212,8 @@ void epdl_find_extent_and_populate_pages(struct rrdengine_instance *ctx, EPDL *e internal_fatal(epdl->extent_size != pgc_page_data_size(extent_cache, extent_cache_page), "DBENGINE: cache size does not match the expected size"); - loaded_pages_tag |= PDC_PAGE_LOADED_FROM_EXTENT_CACHE; - not_loaded_pages_tag |= PDC_PAGE_LOADED_FROM_EXTENT_CACHE; + loaded_pages_tag |= PDC_PAGE_EXTENT_FROM_CACHE; + not_loaded_pages_tag |= PDC_PAGE_EXTENT_FROM_CACHE; extent_found_in_cache = true; } else { @@ -1194,8 +1256,8 @@ void epdl_find_extent_and_populate_pages(struct rrdengine_instance *ctx, EPDL *e extent_compressed_data = pgc_page_data(extent_cache_page); - loaded_pages_tag |= PDC_PAGE_LOADED_FROM_DISK; - not_loaded_pages_tag |= PDC_PAGE_LOADED_FROM_DISK; + loaded_pages_tag |= PDC_PAGE_EXTENT_FROM_DISK; + not_loaded_pages_tag |= PDC_PAGE_EXTENT_FROM_DISK; } } @@ -1208,26 +1270,33 @@ void epdl_find_extent_and_populate_pages(struct rrdengine_instance *ctx, EPDL *e if(extent_used) { // since the extent was used, all the pages that are not // loaded from this extent, were not found in the extent - not_loaded_pages_tag |= PDC_PAGE_FAILED_UUID_NOT_IN_EXTENT; + not_loaded_pages_tag |= PDC_PAGE_FAILED_NOT_IN_EXTENT; + statistics_counter = &rrdeng_cache_efficiency_stats.pages_load_fail_not_found; } - else + else { not_loaded_pages_tag |= PDC_PAGE_FAILED_INVALID_EXTENT; + statistics_counter = &rrdeng_cache_efficiency_stats.pages_load_fail_invalid_extent; + } } - else + else { not_loaded_pages_tag |= PDC_PAGE_FAILED_TO_MAP_EXTENT; - + statistics_counter = &rrdeng_cache_efficiency_stats.pages_load_fail_cant_mmap_extent; + } if(extent_cache_page) pgc_page_release(extent_cache, extent_cache_page); |