summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-01-25 01:56:49 +0200
committerGitHub <noreply@github.com>2023-01-25 01:56:49 +0200
commit3a430c181e7655a8460b40e9864395694f223e46 (patch)
tree538c69896374d1a8587d6f6719033c160014e650 /daemon
parent0c1fbbe591d5b99f747877feb02557354ff621b2 (diff)
DBENGINE v2 - improvements part 8 (#14319)
* cache 100 pages for each size our tiers need * smarter page caching * account the caching structures * dynamic max number of cached pages * make variables const to ensure they are not changed * make sure replication timestamps do not go to the future * replication now sends chart and dimension states atomically; replication receivers ignores chart and dimension states when rbegin is also ignored * make sure all pages are flushed on shutdown * take into account empty points too * when recalculating retention update first_time_s on metrics only when they are bigger * Report the datafile number we use to recalculate retention * Report the datafile number we use to recalculate retention * rotate db at startup * make query plans overlap * Calculate properly first time s * updated event labels * negative page caching fix * Atempt to create missing tables on query failure * Atempt to create missing tables on query failure (part 2) * negative page caching for all gaps, to eliminate jv2 scans * Fix unittest Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'daemon')
-rw-r--r--daemon/event_loop.c63
-rw-r--r--daemon/event_loop.h61
-rw-r--r--daemon/global_statistics.c5
-rw-r--r--daemon/main.c19
4 files changed, 100 insertions, 48 deletions
diff --git a/daemon/event_loop.c b/daemon/event_loop.c
index 1dde811466..cb101d9143 100644
--- a/daemon/event_loop.c
+++ b/daemon/event_loop.c
@@ -13,31 +13,46 @@ void register_libuv_worker_jobs() {
registered = true;
worker_register("LIBUV");
- worker_register_job_name(UV_EVENT_PAGE_LOOKUP, "page lookup");
- worker_register_job_name(UV_EVENT_PAGE_POPULATION, "populate page");
- worker_register_job_name(UV_EVENT_EXT_DECOMPRESSION, "extent decompression");
- worker_register_job_name(UV_EVENT_METADATA_STORE, "store host metadata");
- worker_register_job_name(UV_EVENT_JOURNAL_INDEX_WAIT, "journal v2 wait");
- worker_register_job_name(UV_EVENT_JOURNAL_INDEX, "journal v2 indexing");
- worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command");
- worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup");
- worker_register_job_name(UV_EVENT_EXTENT_CACHE, "extent cache");
- worker_register_job_name(UV_EVENT_EXTENT_MMAP, "extent mmap");
- worker_register_job_name(UV_EVENT_FLUSH_MAIN, "flush main");
- worker_register_job_name(UV_EVENT_EVICT_MAIN, "evict main");
- worker_register_job_name(UV_EVENT_ANALYZE_V2, "analyze journalfile");
- worker_register_job_name(UV_EVENT_RETENTION_V2, "calculate retention");
- worker_register_job_name(UV_EVENT_RETENTION_UPDATE, "update retention");
- worker_register_job_name(UV_EVENT_DATAFILE_ACQUIRE, "datafile acquire");
- worker_register_job_name(UV_EVENT_DATAFILE_DELETE, "datafile deletion");
- worker_register_job_name(UV_EVENT_FLUSHED_TO_OPEN, "flushed to open");
- worker_register_job_name(UV_EVENT_PREP_QUERY, "prep query");
+
+ // generic
worker_register_job_name(UV_EVENT_WORKER_INIT, "worker init");
- worker_register_job_name(UV_EVENT_FLUSH_PAGES, "flush pages");
- worker_register_job_name(UV_EVENT_BUFFERS_CLEANUP, "buffers cleanup");
- worker_register_job_name(UV_EVENT_QUIESCE, "quiesce");
- worker_register_job_name(UV_EVENT_POPULATE_MRG, "populate mrg");
- worker_register_job_name(UV_EVENT_SHUTDOWN, "shutdown");
+
+ // query related
+ worker_register_job_name(UV_EVENT_DBENGINE_QUERY, "query");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP, "extent cache");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_MMAP, "extent mmap");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION, "extent decompression");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP, "page lookup");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION, "page populate");
+
+ // flushing related
+ worker_register_job_name(UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE, "flush main");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_WRITE, "extent write");
+ worker_register_job_name(UV_EVENT_DBENGINE_FLUSHED_TO_OPEN, "flushed to open");
+
+ // datafile full
+ worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT, "jv2 index wait");
+ worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX, "jv2 indexing");
+
+ // db rotation related
+ worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT, "datafile delete wait");
+ worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE, "datafile deletion");
+ worker_register_job_name(UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, "find rotated metrics");
+ worker_register_job_name(UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, "find remaining retention");
+ worker_register_job_name(UV_EVENT_DBENGINE_POPULATE_MRG, "update retention");
+
+ // other dbengine events
+ worker_register_job_name(UV_EVENT_DBENGINE_EVICT_MAIN_CACHE, "evict main");
+ worker_register_job_name(UV_EVENT_DBENGINE_BUFFERS_CLEANUP, "dbengine buffers cleanup");
+ worker_register_job_name(UV_EVENT_DBENGINE_QUIESCE, "dbengine quiesce");
+ worker_register_job_name(UV_EVENT_DBENGINE_SHUTDOWN, "dbengine shutdown");
+
+ // metadata
+ worker_register_job_name(UV_EVENT_METADATA_STORE, "metadata store host");
+ worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup");
+
+ // netdatacli
+ worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command");
uv_thread_set_name_np(pthread_self(), "LIBUV_WORKER");
}
diff --git a/daemon/event_loop.h b/daemon/event_loop.h
index 6b72dd4801..a539a39cfa 100644
--- a/daemon/event_loop.h
+++ b/daemon/event_loop.h
@@ -5,31 +5,46 @@
enum event_loop_job {
UV_EVENT_JOB_NONE = 0,
- UV_EVENT_EXT_DECOMPRESSION,
- UV_EVENT_PAGE_LOOKUP,
- UV_EVENT_PAGE_POPULATION,
+
+ // generic
+ UV_EVENT_WORKER_INIT,
+
+ // query related
+ UV_EVENT_DBENGINE_QUERY,
+ UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP,
+ UV_EVENT_DBENGINE_EXTENT_MMAP,
+ UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION,
+ UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP,
+ UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION,
+
+ // flushing related
+ UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE,
+ UV_EVENT_DBENGINE_EXTENT_WRITE,
+ UV_EVENT_DBENGINE_FLUSHED_TO_OPEN,
+
+ // datafile full
+ UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT,
+ UV_EVENT_DBENGINE_JOURNAL_INDEX,
+
+ // db rotation related
+ UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT,
+ UV_EVENT_DBENGINE_DATAFILE_DELETE,
+ UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, // find the metrics that are rotated
+ UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, // find their remaining retention
+ UV_EVENT_DBENGINE_POPULATE_MRG, // update mrg
+
+ // other dbengine events
+ UV_EVENT_DBENGINE_EVICT_MAIN_CACHE,
+ UV_EVENT_DBENGINE_BUFFERS_CLEANUP,
+ UV_EVENT_DBENGINE_QUIESCE,
+ UV_EVENT_DBENGINE_SHUTDOWN,
+
+ // metadata
UV_EVENT_METADATA_STORE,
- UV_EVENT_JOURNAL_INDEX_WAIT,
- UV_EVENT_JOURNAL_INDEX,
- UV_EVENT_SCHEDULE_CMD,
UV_EVENT_METADATA_CLEANUP,
- UV_EVENT_EXTENT_CACHE,
- UV_EVENT_EXTENT_MMAP,
- UV_EVENT_FLUSH_MAIN,
- UV_EVENT_EVICT_MAIN,
- UV_EVENT_ANALYZE_V2,
- UV_EVENT_RETENTION_V2,
- UV_EVENT_RETENTION_UPDATE,
- UV_EVENT_DATAFILE_ACQUIRE,
- UV_EVENT_DATAFILE_DELETE,
- UV_EVENT_FLUSHED_TO_OPEN,
- UV_EVENT_PREP_QUERY,
- UV_EVENT_WORKER_INIT,
- UV_EVENT_FLUSH_PAGES,
- UV_EVENT_BUFFERS_CLEANUP,
- UV_EVENT_QUIESCE,
- UV_EVENT_POPULATE_MRG,
- UV_EVENT_SHUTDOWN,
+
+ // netdatacli
+ UV_EVENT_SCHEDULE_CMD,
};
void register_libuv_worker_jobs();
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index 2422b5a3a4..cb89ed01d9 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -1734,7 +1734,7 @@ static void dbengine2_statistics_charts(void) {
struct rrdeng_buffer_sizes buffers = rrdeng_get_buffer_sizes();
size_t buffers_total_size = buffers.handles + buffers.xt_buf + buffers.xt_io + buffers.pdc + buffers.descriptors +
- buffers.opcodes + buffers.wal + buffers.workers + buffers.epdl + buffers.deol + buffers.pd;
+ buffers.opcodes + buffers.wal + buffers.workers + buffers.epdl + buffers.deol + buffers.pd + buffers.pages;
#ifdef PDC_USE_JULYL
buffers_total_size += buffers.julyl;
@@ -1798,6 +1798,7 @@ static void dbengine2_statistics_charts(void) {
static RRDDIM *rd_pgc_buffers_epdl = NULL;
static RRDDIM *rd_pgc_buffers_deol = NULL;
static RRDDIM *rd_pgc_buffers_pd = NULL;
+ static RRDDIM *rd_pgc_buffers_pages = NULL;
#ifdef PDC_USE_JULYL
static RRDDIM *rd_pgc_buffers_julyl = NULL;
#endif
@@ -1824,6 +1825,7 @@ static void dbengine2_statistics_charts(void) {
rd_pgc_buffers_workers = rrddim_add(st_pgc_buffers, "workers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_pgc_buffers_pdc = rrddim_add(st_pgc_buffers, "pdc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_pgc_buffers_pd = rrddim_add(st_pgc_buffers, "pd", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_pages = rrddim_add(st_pgc_buffers, "pages", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_pgc_buffers_xt_io = rrddim_add(st_pgc_buffers, "extent io", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_pgc_buffers_xt_buf = rrddim_add(st_pgc_buffers, "extent buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_pgc_buffers_epdl = rrddim_add(st_pgc_buffers, "epdl", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
@@ -1841,6 +1843,7 @@ static void dbengine2_statistics_charts(void) {
rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_workers, (collected_number)buffers.workers);
rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pdc, (collected_number)buffers.pdc);
rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pd, (collected_number)buffers.pd);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pages, (collected_number)buffers.pages);
rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_io, (collected_number)buffers.xt_io);
rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_buf, (collected_number)buffers.xt_buf);
rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_epdl, (collected_number)buffers.epdl);
diff --git a/daemon/main.c b/daemon/main.c
index c4217ea1bf..1eff318f1a 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -428,6 +428,25 @@ void netdata_cleanup_and_exit(int ret) {
#ifdef ENABLE_DBENGINE
if(dbengine_enabled) {
+ delta_shutdown_time("wait for dbengine collectors to finish");
+
+ size_t running = 1;
+ while(running) {
+ running = 0;
+ for (size_t tier = 0; tier < storage_tiers; tier++)
+ running += rrdeng_collectors_running(multidb_ctx[tier]);
+
+ if(running)
+ sleep_usec(100 * USEC_PER_MS);
+ }
+
+ delta_shutdown_time("wait for dbengine main cache to finish flushing");
+
+ while (pgc_hot_and_dirty_entries(main_cache)) {
+ pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL);
+ sleep_usec(100 * USEC_PER_MS);
+ }
+
delta_shutdown_time("stop dbengine tiers");
for (size_t tier = 0; tier < storage_tiers; tier++)
rrdeng_exit(multidb_ctx[tier]);