diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2023-01-25 01:56:49 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-25 01:56:49 +0200 |
commit | 3a430c181e7655a8460b40e9864395694f223e46 (patch) | |
tree | 538c69896374d1a8587d6f6719033c160014e650 /daemon | |
parent | 0c1fbbe591d5b99f747877feb02557354ff621b2 (diff) |
DBENGINE v2 - improvements part 8 (#14319)
* cache 100 pages for each size our tiers need
* smarter page caching
* account the caching structures
* dynamic max number of cached pages
* make variables const to ensure they are not changed
* make sure replication timestamps do not go to the future
* replication now sends chart and dimension states atomically; replication receivers ignores chart and dimension states when rbegin is also ignored
* make sure all pages are flushed on shutdown
* take into account empty points too
* when recalculating retention update first_time_s on metrics only when they are bigger
* Report the datafile number we use to recalculate retention
* Report the datafile number we use to recalculate retention
* rotate db at startup
* make query plans overlap
* Calculate properly first time s
* updated event labels
* negative page caching fix
* Atempt to create missing tables on query failure
* Atempt to create missing tables on query failure (part 2)
* negative page caching for all gaps, to eliminate jv2 scans
* Fix unittest
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'daemon')
-rw-r--r-- | daemon/event_loop.c | 63 | ||||
-rw-r--r-- | daemon/event_loop.h | 61 | ||||
-rw-r--r-- | daemon/global_statistics.c | 5 | ||||
-rw-r--r-- | daemon/main.c | 19 |
4 files changed, 100 insertions, 48 deletions
diff --git a/daemon/event_loop.c b/daemon/event_loop.c index 1dde811466..cb101d9143 100644 --- a/daemon/event_loop.c +++ b/daemon/event_loop.c @@ -13,31 +13,46 @@ void register_libuv_worker_jobs() { registered = true; worker_register("LIBUV"); - worker_register_job_name(UV_EVENT_PAGE_LOOKUP, "page lookup"); - worker_register_job_name(UV_EVENT_PAGE_POPULATION, "populate page"); - worker_register_job_name(UV_EVENT_EXT_DECOMPRESSION, "extent decompression"); - worker_register_job_name(UV_EVENT_METADATA_STORE, "store host metadata"); - worker_register_job_name(UV_EVENT_JOURNAL_INDEX_WAIT, "journal v2 wait"); - worker_register_job_name(UV_EVENT_JOURNAL_INDEX, "journal v2 indexing"); - worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command"); - worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup"); - worker_register_job_name(UV_EVENT_EXTENT_CACHE, "extent cache"); - worker_register_job_name(UV_EVENT_EXTENT_MMAP, "extent mmap"); - worker_register_job_name(UV_EVENT_FLUSH_MAIN, "flush main"); - worker_register_job_name(UV_EVENT_EVICT_MAIN, "evict main"); - worker_register_job_name(UV_EVENT_ANALYZE_V2, "analyze journalfile"); - worker_register_job_name(UV_EVENT_RETENTION_V2, "calculate retention"); - worker_register_job_name(UV_EVENT_RETENTION_UPDATE, "update retention"); - worker_register_job_name(UV_EVENT_DATAFILE_ACQUIRE, "datafile acquire"); - worker_register_job_name(UV_EVENT_DATAFILE_DELETE, "datafile deletion"); - worker_register_job_name(UV_EVENT_FLUSHED_TO_OPEN, "flushed to open"); - worker_register_job_name(UV_EVENT_PREP_QUERY, "prep query"); + + // generic worker_register_job_name(UV_EVENT_WORKER_INIT, "worker init"); - worker_register_job_name(UV_EVENT_FLUSH_PAGES, "flush pages"); - worker_register_job_name(UV_EVENT_BUFFERS_CLEANUP, "buffers cleanup"); - worker_register_job_name(UV_EVENT_QUIESCE, "quiesce"); - worker_register_job_name(UV_EVENT_POPULATE_MRG, "populate mrg"); - worker_register_job_name(UV_EVENT_SHUTDOWN, "shutdown"); + + // query related + worker_register_job_name(UV_EVENT_DBENGINE_QUERY, "query"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP, "extent cache"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_MMAP, "extent mmap"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION, "extent decompression"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP, "page lookup"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION, "page populate"); + + // flushing related + worker_register_job_name(UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE, "flush main"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_WRITE, "extent write"); + worker_register_job_name(UV_EVENT_DBENGINE_FLUSHED_TO_OPEN, "flushed to open"); + + // datafile full + worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT, "jv2 index wait"); + worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX, "jv2 indexing"); + + // db rotation related + worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT, "datafile delete wait"); + worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE, "datafile deletion"); + worker_register_job_name(UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, "find rotated metrics"); + worker_register_job_name(UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, "find remaining retention"); + worker_register_job_name(UV_EVENT_DBENGINE_POPULATE_MRG, "update retention"); + + // other dbengine events + worker_register_job_name(UV_EVENT_DBENGINE_EVICT_MAIN_CACHE, "evict main"); + worker_register_job_name(UV_EVENT_DBENGINE_BUFFERS_CLEANUP, "dbengine buffers cleanup"); + worker_register_job_name(UV_EVENT_DBENGINE_QUIESCE, "dbengine quiesce"); + worker_register_job_name(UV_EVENT_DBENGINE_SHUTDOWN, "dbengine shutdown"); + + // metadata + worker_register_job_name(UV_EVENT_METADATA_STORE, "metadata store host"); + worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup"); + + // netdatacli + worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command"); uv_thread_set_name_np(pthread_self(), "LIBUV_WORKER"); } diff --git a/daemon/event_loop.h b/daemon/event_loop.h index 6b72dd4801..a539a39cfa 100644 --- a/daemon/event_loop.h +++ b/daemon/event_loop.h @@ -5,31 +5,46 @@ enum event_loop_job { UV_EVENT_JOB_NONE = 0, - UV_EVENT_EXT_DECOMPRESSION, - UV_EVENT_PAGE_LOOKUP, - UV_EVENT_PAGE_POPULATION, + + // generic + UV_EVENT_WORKER_INIT, + + // query related + UV_EVENT_DBENGINE_QUERY, + UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP, + UV_EVENT_DBENGINE_EXTENT_MMAP, + UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION, + UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP, + UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION, + + // flushing related + UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE, + UV_EVENT_DBENGINE_EXTENT_WRITE, + UV_EVENT_DBENGINE_FLUSHED_TO_OPEN, + + // datafile full + UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT, + UV_EVENT_DBENGINE_JOURNAL_INDEX, + + // db rotation related + UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT, + UV_EVENT_DBENGINE_DATAFILE_DELETE, + UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, // find the metrics that are rotated + UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, // find their remaining retention + UV_EVENT_DBENGINE_POPULATE_MRG, // update mrg + + // other dbengine events + UV_EVENT_DBENGINE_EVICT_MAIN_CACHE, + UV_EVENT_DBENGINE_BUFFERS_CLEANUP, + UV_EVENT_DBENGINE_QUIESCE, + UV_EVENT_DBENGINE_SHUTDOWN, + + // metadata UV_EVENT_METADATA_STORE, - UV_EVENT_JOURNAL_INDEX_WAIT, - UV_EVENT_JOURNAL_INDEX, - UV_EVENT_SCHEDULE_CMD, UV_EVENT_METADATA_CLEANUP, - UV_EVENT_EXTENT_CACHE, - UV_EVENT_EXTENT_MMAP, - UV_EVENT_FLUSH_MAIN, - UV_EVENT_EVICT_MAIN, - UV_EVENT_ANALYZE_V2, - UV_EVENT_RETENTION_V2, - UV_EVENT_RETENTION_UPDATE, - UV_EVENT_DATAFILE_ACQUIRE, - UV_EVENT_DATAFILE_DELETE, - UV_EVENT_FLUSHED_TO_OPEN, - UV_EVENT_PREP_QUERY, - UV_EVENT_WORKER_INIT, - UV_EVENT_FLUSH_PAGES, - UV_EVENT_BUFFERS_CLEANUP, - UV_EVENT_QUIESCE, - UV_EVENT_POPULATE_MRG, - UV_EVENT_SHUTDOWN, + + // netdatacli + UV_EVENT_SCHEDULE_CMD, }; void register_libuv_worker_jobs(); diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index 2422b5a3a4..cb89ed01d9 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -1734,7 +1734,7 @@ static void dbengine2_statistics_charts(void) { struct rrdeng_buffer_sizes buffers = rrdeng_get_buffer_sizes(); size_t buffers_total_size = buffers.handles + buffers.xt_buf + buffers.xt_io + buffers.pdc + buffers.descriptors + - buffers.opcodes + buffers.wal + buffers.workers + buffers.epdl + buffers.deol + buffers.pd; + buffers.opcodes + buffers.wal + buffers.workers + buffers.epdl + buffers.deol + buffers.pd + buffers.pages; #ifdef PDC_USE_JULYL buffers_total_size += buffers.julyl; @@ -1798,6 +1798,7 @@ static void dbengine2_statistics_charts(void) { static RRDDIM *rd_pgc_buffers_epdl = NULL; static RRDDIM *rd_pgc_buffers_deol = NULL; static RRDDIM *rd_pgc_buffers_pd = NULL; + static RRDDIM *rd_pgc_buffers_pages = NULL; #ifdef PDC_USE_JULYL static RRDDIM *rd_pgc_buffers_julyl = NULL; #endif @@ -1824,6 +1825,7 @@ static void dbengine2_statistics_charts(void) { rd_pgc_buffers_workers = rrddim_add(st_pgc_buffers, "workers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_buffers_pdc = rrddim_add(st_pgc_buffers, "pdc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_buffers_pd = rrddim_add(st_pgc_buffers, "pd", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_pages = rrddim_add(st_pgc_buffers, "pages", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_buffers_xt_io = rrddim_add(st_pgc_buffers, "extent io", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_buffers_xt_buf = rrddim_add(st_pgc_buffers, "extent buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_buffers_epdl = rrddim_add(st_pgc_buffers, "epdl", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); @@ -1841,6 +1843,7 @@ static void dbengine2_statistics_charts(void) { rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_workers, (collected_number)buffers.workers); rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pdc, (collected_number)buffers.pdc); rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pd, (collected_number)buffers.pd); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pages, (collected_number)buffers.pages); rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_io, (collected_number)buffers.xt_io); rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_buf, (collected_number)buffers.xt_buf); rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_epdl, (collected_number)buffers.epdl); diff --git a/daemon/main.c b/daemon/main.c index c4217ea1bf..1eff318f1a 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -428,6 +428,25 @@ void netdata_cleanup_and_exit(int ret) { #ifdef ENABLE_DBENGINE if(dbengine_enabled) { + delta_shutdown_time("wait for dbengine collectors to finish"); + + size_t running = 1; + while(running) { + running = 0; + for (size_t tier = 0; tier < storage_tiers; tier++) + running += rrdeng_collectors_running(multidb_ctx[tier]); + + if(running) + sleep_usec(100 * USEC_PER_MS); + } + + delta_shutdown_time("wait for dbengine main cache to finish flushing"); + + while (pgc_hot_and_dirty_entries(main_cache)) { + pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL); + sleep_usec(100 * USEC_PER_MS); + } + delta_shutdown_time("stop dbengine tiers"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_exit(multidb_ctx[tier]); |