diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2023-02-02 00:14:35 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-02 00:14:35 +0200 |
commit | 55d1f00bb7c2403b451947b2a225b5d1f6be9183 (patch) | |
tree | 043e57edb64b319b1eb6a883d6980fa2d9dd2c8e /database/engine | |
parent | 2e56e2b87622a102aef876d297a3cd80d35028e5 (diff) |
DBENGINE v2 - improvements part 12 (#14379)
* parallel initialization of tiers
* do not spawn multiple dbengine event loops
* user configurable dbengine parallel initialization
* size netdata based on the real cpu cores available on the system netdata runs, not on the system monitored
* user configurable system cpus
* move cpuset parsing to os.c/.h
* fix replication of misaligned chart dimensions
* give a different path to each tier thread
* statically allocate the path into the initialization structure
* use aral for reusing dbengine pages
* dictionaries uses ARAL for fixed sized values
* fix compilation without internal checks
* journal v2 index uses aral
* test to see judy allocations
* judy allocations using aral
* Add config option to select if dbengine will use direct I/O (default is yes)
* V1 journafiles will use uv_fs_read instead of mmap (respect the direct I/O setting)
* Remove sqlite3IsMemdb as it is unused
* Fix compilation error when --disable-dbengine is used
* use aral for dbengine work_cmds
* changed aral API to support new features
* pgc and mrg aral overheads
* rrdeng opcodes using aral
* better structuring and naming
* dbegnine query handles using aral
* page descriptors using aral
* remove obsolete linking
* extent io descriptors using aral
* aral keeps one last page alive
* add missing return value
* added judy aral overhead
* pdc now uses aral
* page_details now use aral
* epdl and deol using aral - make sure ARALs are initialized before spawning the event loop
* remove unused linking
* pgc now uses one aral per partition
* aral measure maximum allocation queue
* aral to allocate pages in parallel
* aral parallel pages allocation when needed
* aral cleanup
* track page allocation and page population separately
---------
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'database/engine')
-rw-r--r-- | database/engine/cache.c | 89 | ||||
-rw-r--r-- | database/engine/cache.h | 4 | ||||
-rw-r--r-- | database/engine/datafile.c | 6 | ||||
-rw-r--r-- | database/engine/journalfile.c | 58 | ||||
-rw-r--r-- | database/engine/journalfile.h | 1 | ||||
-rw-r--r-- | database/engine/metric.c | 26 | ||||
-rw-r--r-- | database/engine/metric.h | 2 | ||||
-rw-r--r-- | database/engine/pagecache.c | 126 | ||||
-rw-r--r-- | database/engine/pagecache.h | 7 | ||||
-rw-r--r-- | database/engine/pdc.c | 321 | ||||
-rw-r--r-- | database/engine/pdc.h | 8 | ||||
-rw-r--r-- | database/engine/rrdengine.c | 666 | ||||
-rw-r--r-- | database/engine/rrdengine.h | 21 | ||||
-rwxr-xr-x | database/engine/rrdengineapi.c | 23 | ||||
-rw-r--r-- | database/engine/rrdengineapi.h | 5 |
15 files changed, 423 insertions, 940 deletions
diff --git a/database/engine/cache.c b/database/engine/cache.c index 9292bc5354..4091684b29 100644 --- a/database/engine/cache.c +++ b/database/engine/cache.c @@ -106,7 +106,7 @@ struct pgc { } config; #ifdef PGC_WITH_ARAL - ARAL *aral; + ARAL **aral; #endif PGC_CACHE_LINE_PADDING(0); @@ -311,7 +311,7 @@ static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) { if(unlikely(wanted_cache_size < referenced_size * 2 / 3)) wanted_cache_size = referenced_size * 2 / 3; - current_cache_size = __atomic_load_n(&cache->stats.size, __ATOMIC_RELAXED); + current_cache_size = __atomic_load_n(&cache->stats.size, __ATOMIC_RELAXED); // + pgc_aral_overhead(); per1000 = (size_t)((unsigned long long)current_cache_size * 1000ULL / (unsigned long long)wanted_cache_size); @@ -430,7 +430,7 @@ static void pgc_section_pages_static_aral_init(void) { "pgc_section", sizeof(struct section_pages), 0, - 4096, + 65536, NULL, NULL, NULL, false, false); netdata_spinlock_unlock(&spinlock); @@ -851,7 +851,7 @@ static inline bool acquired_page_get_for_deletion_or_release_it(PGC *cache __may // ---------------------------------------------------------------------------- // Indexing -static inline void free_this_page(PGC *cache, PGC_PAGE *page) { +static inline void free_this_page(PGC *cache, PGC_PAGE *page, size_t partition __maybe_unused) { // call the callback to free the user supplied memory cache->config.pgc_free_clean_cb(cache, (PGC_ENTRY){ .section = page->section, @@ -874,7 +874,7 @@ static inline void free_this_page(PGC *cache, PGC_PAGE *page) { // free our memory #ifdef PGC_WITH_ARAL - aral_freez(cache->aral, page); + aral_freez(cache->aral[partition], page); #else freez(page); #endif @@ -942,7 +942,7 @@ static inline void remove_and_free_page_not_in_any_queue_and_acquired_for_deleti pgc_index_write_lock(cache, partition); remove_this_page_from_index_unsafe(cache, page, partition); pgc_index_write_unlock(cache, partition); - free_this_page(cache, page); + free_this_page(cache, page, partition); } static inline bool make_acquired_page_clean_and_evict_or_page_release(PGC *cache, PGC_PAGE *page) { @@ -1137,7 +1137,7 @@ static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evic next = page->link.next; size_t page_size = page->assumed_size; - free_this_page(cache, page); + free_this_page(cache, page, partition); __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED); __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED); @@ -1156,7 +1156,7 @@ static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evic pgc_index_write_lock(cache, partition); remove_this_page_from_index_unsafe(cache, page, partition); pgc_index_write_unlock(cache, partition); - free_this_page(cache, page); + free_this_page(cache, page, partition); __atomic_sub_fetch(&cache->stats.evicting_entries, 1, __ATOMIC_RELAXED); __atomic_sub_fetch(&cache->stats.evicting_size, page_size, __ATOMIC_RELAXED); @@ -1191,8 +1191,10 @@ premature_exit: static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) { __atomic_add_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED); + size_t partition = pgc_indexing_partition(cache, entry->metric_id); + #ifdef PGC_WITH_ARAL - PGC_PAGE *allocation = aral_mallocz(cache->aral); + PGC_PAGE *allocation = aral_mallocz(cache->aral[partition]); #endif PGC_PAGE *page; size_t spins = 0; @@ -1201,7 +1203,6 @@ static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) { if(++spins > 1) __atomic_add_fetch(&cache->stats.insert_spins, 1, __ATOMIC_RELAXED); - size_t partition = pgc_indexing_partition(cache, entry->metric_id); pgc_index_write_lock(cache, partition); size_t mem_before_judyl = 0, mem_after_judyl = 0; @@ -1299,7 +1300,7 @@ static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) { #ifdef PGC_WITH_ARAL if(allocation) - aral_freez(cache->aral, allocation); + aral_freez(cache->aral[partition], allocation); #endif __atomic_sub_fetch(&cache->stats.workers_add, 1, __ATOMIC_RELAXED); @@ -1757,7 +1758,7 @@ PGC *pgc_create(const char *name, cache->config.max_pages_per_inline_eviction = (max_pages_per_inline_eviction < 2) ? 2 : max_pages_per_inline_eviction; cache->config.max_skip_pages_per_inline_eviction = (max_skip_pages_per_inline_eviction < 2) ? 2 : max_skip_pages_per_inline_eviction; cache->config.max_flushes_inline = (max_flushes_inline < 1) ? 1 : max_flushes_inline; - cache->config.partitions = partitions < 1 ? (size_t)get_system_cpus() : partitions; + cache->config.partitions = partitions < 1 ? (size_t)get_netdata_cpus() : partitions; cache->config.additional_bytes_per_page = additional_bytes_per_page; cache->config.max_workers_evict_inline = max_inline_evictors; @@ -1787,20 +1788,40 @@ PGC *pgc_create(const char *name, cache->clean.linked_list_in_sections_judy = false; cache->clean.stats = &cache->stats.queues.clean; + pgc_section_pages_static_aral_init(); + #ifdef PGC_WITH_ARAL - cache->aral = aral_create(name, - sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page, - 0, - 4096, - NULL, NULL, false, false); + cache->aral = callocz(cache->config.partitions, sizeof(ARAL *)); + for(size_t part = 0; part < cache->config.partitions ; part++) { + char buf[100 +1]; + snprintfz(buf, 100, "%s[%zu]", name, part); + cache->aral[part] = aral_create( + buf, + sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page, + 0, + 16384, + aral_statistics(pgc_section_pages_aral), + NULL, NULL, false, false); + } #endif - pgc_section_pages_static_aral_init(); pointer_index_init(cache); return cache; } +struct aral_statistics *pgc_aral_statistics(void) { + return aral_statistics(pgc_section_pages_aral); +} + +size_t pgc_aral_structures(void) { + return aral_structures(pgc_section_pages_aral); +} + +size_t pgc_aral_overhead(void) { + return aral_overhead(pgc_section_pages_aral); +} + void pgc_flush_all_hot_and_dirty_pages(PGC *cache, Word_t section) { all_hot_pages_to_dirty(cache, section); @@ -1822,9 +1843,17 @@ void pgc_destroy(PGC *cache) { error("DBENGINE CACHE: there are %zu referenced cache pages - leaving the cache allocated", PGC_REFERENCED_PAGES(cache)); else { pointer_destroy_index(cache); + + for(size_t part = 0; part < cache->config.partitions ; part++) + netdata_rwlock_destroy(&cache->index[part].rwlock); + #ifdef PGC_WITH_ARAL - aral_destroy(cache->aral); + for(size_t part = 0; part < cache->config.partitions ; part++) + aral_destroy(cache->aral[part]); + + freez(cache->aral); #endif + freez(cache); } } @@ -2059,6 +2088,10 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ return; } + ARAL *ar_mi = aral_by_size_acquire(sizeof(struct jv2_metrics_info)); + ARAL *ar_pi = aral_by_size_acquire(sizeof(struct jv2_page_info)); + ARAL *ar_ei = aral_by_size_acquire(sizeof(struct jv2_extents_info)); + for(PGC_PAGE *page = sp->base; page ; page = page->link.next) { struct extent_io_data *xio = (struct extent_io_data *)page->custom_data; if(xio->fileno != datafile_fileno) continue; @@ -2091,7 +2124,7 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ struct jv2_extents_info *ei; if(!*PValue) { - ei = callocz(1, sizeof(struct jv2_extents_info)); + ei = aral_mallocz(ar_ei); // callocz(1, sizeof(struct jv2_extents_info)); ei->pos = xio->pos; ei->bytes = xio->bytes; ei->number_of_pages = 1; @@ -2115,11 +2148,13 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ struct jv2_metrics_info *mi; if(!*PValue) { - mi = callocz(1, sizeof(struct jv2_metrics_info)); + mi = aral_mallocz(ar_mi); // callocz(1, sizeof(struct jv2_metrics_info)); mi->uuid = mrg_metric_uuid(main_mrg, (METRIC *)page->metric_id); mi->first_time_s = page->start_time_s; mi->last_time_s = page->end_time_s; mi->number_of_pages = 1; + mi->page_list_header = 0; + mi->JudyL_pages_by_start_time = NULL; *PValue = mi; count_of_unique_metrics++; @@ -2138,7 +2173,7 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ fatal("Corrupted JudyL metric pages"); if(!*PValue) { - struct jv2_page_info *pi = callocz(1, (sizeof(struct jv2_page_info))); + struct jv2_page_info *pi = aral_mallocz(ar_pi); // callocz(1, (sizeof(struct jv2_page_info))); pi->start_time_s = page->start_time_s; pi->end_time_s = page->end_time_s; pi->update_every_s = page->update_every_s; @@ -2182,11 +2217,11 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ page_transition_unlock(cache, pi->page); pgc_page_hot_to_dirty_and_release(cache, pi->page); // make_acquired_page_clean_and_evict_or_page_release(cache, pi->page); - freez(pi); + aral_freez(ar_pi, pi); } JudyLFreeArray(&mi->JudyL_pages_by_start_time, PJE0); - freez(mi); + aral_freez(ar_mi, mi); } JudyLFreeArray(&JudyL_metrics, PJE0); } @@ -2197,11 +2232,15 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ Word_t extent_pos = 0; while ((PValue = JudyLFirstThenNext(JudyL_extents_pos, &extent_pos, &extent_pos_first))) { struct jv2_extents_info *ei = *PValue; - freez(ei); + aral_freez(ar_ei, ei); } JudyLFreeArray(&JudyL_extents_pos, PJE0); } + aral_by_size_release(ar_ei); + aral_by_size_release(ar_pi); + aral_by_size_release(ar_mi); + __atomic_sub_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED); } diff --git a/database/engine/cache.h b/database/engine/cache.h index 740e94c065..65e6a61379 100644 --- a/database/engine/cache.h +++ b/database/engine/cache.h @@ -242,4 +242,8 @@ bool pgc_flush_pages(PGC *cache, size_t max_flushes); struct pgc_statistics pgc_get_statistics(PGC *cache); size_t pgc_hot_and_dirty_entries(PGC *cache); +struct aral_statistics *pgc_aral_statistics(void); +size_t pgc_aral_structures(void); +size_t pgc_aral_overhead(void); + #endif // DBENGINE_CACHE_H diff --git a/database/engine/datafile.c b/database/engine/datafile.c index cc0ed1354d..286ae1e30e 100644 --- a/database/engine/datafile.c +++ b/database/engine/datafile.c @@ -257,7 +257,7 @@ int create_data_file(struct rrdengine_datafile *datafile) char path[RRDENG_PATH_MAX]; generate_datafilepath(datafile, path, sizeof(path)); - fd = open_file_direct_io(path, O_CREAT | O_RDWR | O_TRUNC, &file); + fd = open_file_for_io(path, O_CREAT | O_RDWR | O_TRUNC, &file, use_direct_io); if (fd < 0) { ctx_fs_error(ctx); return fd; @@ -340,7 +340,7 @@ static int load_data_file(struct rrdengine_datafile *datafile) char path[RRDENG_PATH_MAX]; generate_datafilepath(datafile, path, sizeof(path)); - fd = open_file_direct_io(path, O_RDWR, &file); + fd = open_file_for_io(path, O_RDWR, &file, use_direct_io); if (fd < 0) { ctx_fs_error(ctx); return fd; @@ -392,7 +392,7 @@ static int scan_data_files(struct rrdengine_instance *ctx) { int ret; unsigned tier, no, matched_files, i,failed_to_load; - static uv_fs_t req; + uv_fs_t req; uv_dirent_t dent; struct rrdengine_datafile **datafiles, *datafile; struct rrdengine_journalfile *journalfile; diff --git a/database/engine/journalfile.c b/database/engine/journalfile.c index 3f9c9e6fb2..9a9769321a 100644 --- a/database/engine/journalfile.c +++ b/database/engine/journalfile.c @@ -495,7 +495,7 @@ int journalfile_create(struct rrdengine_journalfile *journalfile, struct rrdengi char path[RRDENG_PATH_MAX]; journalfile_v1_generate_path(datafile, path, sizeof(path)); - fd = open_file_direct_io(path, O_CREAT | O_RDWR | O_TRUNC, &file); + fd = open_file_for_io(path, O_CREAT | O_RDWR | O_TRUNC, &file, use_direct_io); if (fd < 0) { ctx_fs_error(ctx); return fd; @@ -704,7 +704,7 @@ static unsigned journalfile_replay_transaction(struct rrdengine_instance *ctx, s static uint64_t journalfile_iterate_transactions(struct rrdengine_instance *ctx, struct rrdengine_journalfile *journalfile) { uv_file file; - uint64_t file_size;//, data_file_size; + uint64_t file_size; int ret; uint64_t pos, pos_i, max_id, id; unsigned size_bytes; @@ -714,33 +714,26 @@ static uint64_t journalfile_iterate_transactions(struct rrdengine_instance *ctx, file = journalfile->file; file_size = journalfile->unsafe.pos; - //data_file_size = journalfile->datafile->pos; TODO: utilize this? max_id = 1; - bool journal_is_mmapped = (journalfile->data != NULL); - if (unlikely(!journal_is_mmapped)) { - ret = posix_memalign((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES); - if (unlikely(ret)) - fatal("DBENGINE: posix_memalign:%s", strerror(ret)); - } - else - buf = journalfile->data + sizeof(struct rrdeng_jf_sb); - for (pos = sizeof(struct rrdeng_jf_sb) ; pos < file_size ; pos += READAHEAD_BYTES) { + ret = posix_memalign((void *)&buf, RRDFILE_ALIGNMENT, READAHEAD_BYTES); + if (unlikely(ret)) + fatal("DBENGINE: posix_memalign:%s", strerror(ret)); + + for (pos = sizeof(struct rrdeng_jf_sb); pos < file_size; pos += READAHEAD_BYTES) { size_bytes = MIN(READAHEAD_BYTES, file_size - pos); - if (unlikely(!journal_is_mmapped)) { - iov = uv_buf_init(buf, size_bytes); - ret = uv_fs_read(NULL, &req, file, &iov, 1, pos, NULL); - if (ret < 0) { - error("DBENGINE: uv_fs_read: pos=%" PRIu64 ", %s", pos, uv_strerror(ret)); - uv_fs_req_cleanup(&req); - goto skip_file; - } - fatal_assert(req.result >= 0); + iov = uv_buf_init(buf, size_bytes); + ret = uv_fs_read(NULL, &req, file, &iov, 1, pos, NULL); + if (ret < 0) { + error("DBENGINE: uv_fs_read: pos=%" PRIu64 ", %s", pos, uv_strerror(ret)); uv_fs_req_cleanup(&req); - ctx_io_read_op_bytes(ctx, size_bytes); + goto skip_file; } + fatal_assert(req.result >= 0); + uv_fs_req_cleanup(&req); + ctx_io_read_op_bytes(ctx, size_bytes); - for (pos_i = 0 ; pos_i < size_bytes ; ) { + for (pos_i = 0; pos_i < size_bytes;) { unsigned max_size; max_size = pos + size_bytes - pos_i; @@ -752,12 +745,9 @@ static uint64_t journalfile_iterate_transactions(struct rrdengine_instance *ctx, pos_i += ret; max_id = MAX(max_id, id); } - if (likely(journal_is_mmapped)) - buf += size_bytes; } skip_file: - if (unlikely(!journal_is_mmapped)) - posix_memfree(buf); + posix_memfree(buf); return max_id; } @@ -1400,18 +1390,15 @@ int journalfile_load(struct rrdengine_instance *ctx, struct rrdengine_journalfil uint64_t file_size, max_id; char path[RRDENG_PATH_MAX]; - // Do not try to load the latest file (always rebuild and live migrate) + // Do not try to load the latest file if (datafile->fileno != ctx_last_fileno_get(ctx)) { - if (!journalfile_v2_load(ctx, journalfile, datafile)) { -// unmap_journal_file(journalfile); + if (likely(!journalfile_v2_load(ctx, journalfile, datafile))) return 0; - } } journalfile_v1_generate_path(datafile, path, sizeof(path)); - // If it is not the last file, open read only - fd = open_file_direct_io(path, O_RDWR, &file); + fd = open_file_for_io(path, O_RDWR, &file, use_direct_io); if (fd < 0) { ctx_fs_error(ctx); return fd; @@ -1432,16 +1419,13 @@ int journalfile_load(struct rrdengine_instance *ctx, struct rrdengine_journalfil journalfile->file = file; journalfile->unsafe.pos = file_size; - journalfile->data = netdata_mmap(path, file_size, MAP_SHARED, 0, !(datafile->fileno == ctx_last_fileno_get(ctx)), NULL); - info("DBENGINE: loading journal file '%s' using %s.", path, journalfile->data?"MMAP":"uv_fs_read"); + info("DBENGINE: loading journal file '%s'", path); max_id = journalfile_iterate_transactions(ctx, journalfile); __atomic_store_n(&ctx->atomic.transaction_id, MAX(__atomic_load_n(&ctx->atomic.transaction_id, __ATOMIC_RELAXED), max_id + 1), __ATOMIC_RELAXED); info("DBENGINE: journal file '%s' loaded (size:%"PRIu64").", path, file_size); - if (likely(journalfile->data)) - netdata_munmap(journalfile->data, file_size); bool is_last_file = (ctx_last_fileno_get(ctx) == journalfile->datafile->fileno); if (is_last_file && journalfile->datafile->pos <= rrdeng_target_data_file_size(ctx) / 3) { diff --git a/database/engine/journalfile.h b/database/engine/journalfile.h index fc63ad2994..5fbcc90fa7 100644 --- a/database/engine/journalfile.h +++ b/database/engine/journalfile.h @@ -47,7 +47,6 @@ struct rrdengine_journalfile { } unsafe; uv_file file; - void *data; struct rrdengine_datafile *datafile; }; diff --git a/database/engine/metric.c b/database/engine/metric.c index bad15b50c2..9dc9d9ebcf 100644 --- a/database/engine/metric.c +++ b/database/engine/metric.c @@ -24,6 +24,8 @@ struct metric { // YOU HAVE TO INITIALIZE IT YOURSELF ! }; +static struct aral_statistics mrg_aral_statistics; + struct mrg { ARAL *aral[MRG_PARTITIONS]; @@ -312,16 +314,18 @@ MRG *mrg_create(void) { MRG *mrg = callocz(1, sizeof(MRG)); for(size_t i = 0; i < MRG_PARTITIONS ; i++) { + netdata_rwlock_init(&mrg->index[i].rwlock); + char buf[ARAL_MAX_NAME + 1]; snprintfz(buf, ARAL_MAX_NAME, "mrg[%zu]", i); - netdata_rwlock_init(&mrg->index[i].rwlock); - mrg->aral[i] = aral_create("mrg", - sizeof(METRIC), - 0, - 512, - NULL, NULL, false, - false); + mrg->aral[i] = aral_create(buf, + sizeof(METRIC), + 0, + 16384, + &mrg_aral_statistics, + NULL, NULL, false, + false); } mrg->stats.size = sizeof(MRG); @@ -329,6 +333,14 @@ MRG *mrg_create(void) { return mrg; } +size_t mrg_aral_structures(void) { + return aral_structures_from_stats(&mrg_aral_statistics); +} + +size_t mrg_aral_overhead(void) { + return aral_overhead_from_stats(&mrg_aral_statistics); +} + void mrg_destroy(MRG *mrg __maybe_unused) { // no destruction possible // we can't traverse the metrics list diff --git a/database/engine/metric.h b/database/engine/metric.h index 0b0f17418c..82aff903a3 100644 --- a/database/engine/metric.h +++ b/database/engine/metric.h @@ -73,5 +73,7 @@ bool mrg_metric_set_writer(MRG *mrg, METRIC *metric); bool mrg_metric_clear_writer(MRG *mrg, METRIC *metric); struct mrg_statistics mrg_get_statistics(MRG *mrg); +size_t mrg_aral_structures(void); +size_t mrg_aral_overhead(void); #endif // DBENGINE_METRIC_H diff --git a/database/engine/pagecache.c b/database/engine/pagecache.c index 90092d540f..b4902d784a 100644 --- a/database/engine/pagecache.c +++ b/database/engine/pagecache.c @@ -1055,77 +1055,67 @@ size_t dynamic_extent_cache_size(void) { return target_size; } -void init_page_cache(void) +void pgc_and_mrg_initialize(void) { - static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER; - static bool initialized = false; + main_mrg = mrg_create(); - netdata_spinlock_lock(&spinlock); - if (!initialized) { - initialized = true; + size_t target_cache_size = (size_t)default_rrdeng_page_cache_mb * 1024ULL * 1024ULL; + size_t main_cache_size = (target_cache_size / 100) * 95; + size_t open_cache_size = 0; + size_t extent_cache_size = (target_cache_size / 100) * 5; - main_mrg = mrg_create(); - - size_t target_cache_size = (size_t)default_rrdeng_page_cache_mb * 1024ULL * 1024ULL; - size_t main_cache_size = (target_cache_size / 100) * 95; - size_t open_cache_size = 0; - size_t extent_cache_size = (target_cache_size / 100) * 5; - - if(extent_cache_size < 3 * 1024 * 1024) { - extent_cache_size = 3 * 1024 * 1024; - main_cache_size = target_cache_size - extent_cache_size; - } - - main_cache = pgc_create( - "main_cache", - main_cache_size, - main_cache_free_clean_page_callback, - (size_t) rrdeng_pages_per_extent, - main_cache_flush_dirty_page_init_callback, - main_cache_flush_dirty_page_callback, - 10, - 10240, // if there are that many threads, evict so many at once! - 1000, // - 5, // don't delay too much other threads - PGC_OPTIONS_AUTOSCALE, // AUTOSCALE = 2x max hot pages - 0, // 0 = as many as the system cpus - 0 - ); - - open_cache = pgc_create( - "open_cache", - open_cache_size, // the default is 1MB - open_cache_free_clean_page_callback, - 1, - NULL, - open_cache_flush_dirty_page_callback, - 10, - 10240, // if there are that many threads, evict that many at once! - 1000, // - 3, // don't delay too much other threads - PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_EVICT_PAGES_INLINE | PGC_OPTIONS_FLUSH_PAGES_INLINE, - 0, // 0 = as many as the system cpus - sizeof(struct extent_io_data) - ); - pgc_set_dynamic_target_cache_size_callback(open_cache, dynamic_open_cache_size); - - extent_cache = pgc_create( - "extent_cache", - extent_cache_size, - extent_cache_free_clean_page_callback, - 1, - NULL, - extent_cache_flush_dirty_page_callback, - 5, - 10, // it will lose up to that extents at once! - 100, // - 2, // don't delay too much other threads - PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_EVICT_PAGES_INLINE | PGC_OPTIONS_FLUSH_PAGES_INLINE, - 0, // 0 = as many as the system cpus - 0 - ); - pgc_set_dynamic_target_cache_size_callback(extent_cache, dynamic_extent_cache_size); + if(extent_cache_size < 3 * 1024 * 1024) { + extent_cache_size = 3 * 1024 * 1024; + main_cache_size = target_cache_size - extent_cache_size; } - netdata_spinlock_unlock(&spinlock); + main_cache = pgc_create( + "main_cache", + main_cache_size, + main_cache_free_clean_page_callback, + (size_t) rrdeng_pages_per_extent, + main_cache_flush_dirty_page_init_callback, + main_cache_flush_dirty_page_callback, + 10, + 10240, // if there are that many threads, evict so many at once! + 1000, // + 5, // don't delay too much other threads + PGC_OPTIONS_AUTOSCALE, // AUTOSCALE = 2x max hot pages + 0, // 0 = as many as the system cpus + 0 + ); + + open_cache = pgc_create( + "open_cache", + open_cache_size, // the default is 1MB + open_cache_free_clean_page_callback, + 1, + NULL, + open_cache_flush_dirty_page_callback, + 10, + 10240, // if there are that many threads, evict that many at once! + 1000, // + 3, // don't delay too much other threads + PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_EVICT_PAGES_INLINE | PGC_OPTIONS_FLUSH_PAGES_INLINE, + 0, // 0 = as many as the system cpus + sizeof(struct extent_io_data) + ); + pgc_set_dynamic_target_cache_size_callback(open_cache, dynamic_open_cache_size); + + extent_cache = pgc_create( + "extent_cache", + extent_cache_size, + extent_cache_free_clean_page_callback, + 1, + NULL, + extent_cache_flush_dirty_page_callback, + 5, + 10, // it will lose up to that extents at once! + 100, // + 2, // don't delay too much other threads + PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_EVICT_PAGES_INLINE | PGC_OPTIONS_FLUSH_PAGES_INLINE, + 0, // 0 = as many as the system cpus + 0 + ); + pgc_set_dynamic_target_cache_size_callback(extent_cache, dynamic_extent_cache_size); } diff --git a/database/engine/pagecache.h b/database/engine/pagecache.h index 244ca0b361..9ab7db0786 100644 --- a/database/engine/pagecache.h +++ b/database/engine/pagecache.h @@ -33,11 +33,6 @@ struct page_descr_with_data { struct page_descr_with_data *prev; struct page_descr_with_data *next; } link; - - struct { - struct page_descr_with_data *prev; - struct page_descr_with_data *next; - } cache; }; #define PAGE_INFO_SCRATCH_SZ (8) @@ -62,7 +57,7 @@ void rrdeng_prep_wait(struct page_details_control *pdc); void rrdeng_prep_query(struct page_details_control *pdc); void pg_cache_preload(struct rrdeng_query_handle *handle); struct pgc_page *pg_cache_lookup_next(struct rrdengine_instance *ctx, struct page_details_control *pdc, time_t now_s, time_t last_update_every_s, size_t *entries); -void init_page_cache(void); +void pgc_and_mrg_initialize(void); void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s, time_t end_time_s, time_t update_every_s, struct rrdengine_datafile *datafile, uint64_t extent_offset, unsigned extent_size, uint32_t page_length); diff --git a/database/engine/pdc.c b/database/engine/pdc.c index cf5081dbb3..b2f360e7c3 100644 --- a/database/engine/pdc.c +++ b/database/engine/pdc.c @@ -18,22 +18,12 @@ struct extent_page_details_list { struct extent_page_details_list *prev; struct extent_page_details_list *next; } query; - - struct { - struct extent_page_details_list *prev; - struct extent_page_details_list *next; - } cache; }; typedef struct datafile_extent_offset_list { uv_file file; unsigned fileno; Pvoid_t extent_pd_list_by_extent_offset_JudyL; - - struct { - struct datafile_extent_offset_list *prev; - struct datafile_extent_offset_list *next; - } cache; } DEOL; // ---------------------------------------------------------------------------- @@ -41,315 +31,129 @@ typedef struct datafile_extent_offset_list { static struct { struct { - SPINLOCK spinlock; - PDC *available_items; - size_t available; - } protected; + ARAL *ar; + } pdc; struct { - size_t allocated; - } atomics; -} pdc_globals = { - .protected = { - .spinlock = NETDATA_SPINLOCK_INITIALIZER, - .available_items = NULL, - .available = 0, - }, - .atomics = { - .allocated = 0, - }, -}; |