summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-01-23 22:18:44 +0200
committerGitHub <noreply@github.com>2023-01-23 22:18:44 +0200
commitdd0f7ae992a8de282c77dc7745c5090e5d65cc28 (patch)
treefecf5514eda33c0a96f4d359f30fd07229d12cf7 /daemon
parentc2c3876c519fbc22a60a5d8b753dc6d8e81e0fed (diff)
DBENGINE v2 - improvements part 7 (#14307)
* run cleanup in workers * when there is a discrepancy between update every, fix it * fix the other occurences of metric update every mismatch * allow resetting the same timestamp * validate flushed pages before committing them to disk * initialize collection with the latest time in mrg * these should be static functions * acquire metrics for writing to detect multiple data collections of the same metric * print the uuid of the metric that is collected twice * log the discrepancies of completed pages * 1 second tolerance * unify validation of pages and related logging across dbengine * make do_flush_pages() thread safe * flush pages runs on libuv workers * added uv events to tp workers * dont cross datafile spinlock and rwlock * should be unlock * prevent the creation of multiple datafiles * break an infinite replication loop * do not log the epxansion of the replication window due to start streaming * log all invalid pages with internal checks * do not shutdown event loop threads * add information about collected page events, to find the root cause of invalid collected pages * rewrite of the gap filling to fix the invalid collected pages problem * handle multiple collections of the same metric gracefully * added log about main cache page conflicts; fix gap filling once again... * keep track of the first metric writer * it should be an internal fatal - it does not harm users * do not check of future timestamps on collected pages, since we inherit the clock of the children; do not check collected pages validity without internal checks * prevent negative replication completion percentage * internal error for the discrepancy of mrg * better logging of dbengine new metrics collection * without internal checks it is unused * prevent pluginsd crash on exit due to calling pthread_cancel() on an exited thread * renames and atomics everywhere * if a datafile cannot be acquired for deletion during shutdown, continue - this can happen when there are hot pages in open cache referencing it * Debug for context load * rrdcontext uuid debug * rrddim uuid debug * rrdeng uuid debug * Revert "rrdeng uuid debug" This reverts commit 393da190826a582e7e6cc90771bf91b175826d8b. * Revert "rrddim uuid debug" This reverts commit 72150b30408294f141b19afcfb35abd7c34777d8. * Revert "rrdcontext uuid debug" This reverts commit 2c3b940dc23f460226e9b2a6861c214e840044d0. * Revert "Debug for context load" This reverts commit 0d880fc1589f128524e0b47abd9ff0714283ce3b. * do not use legacy uuids on multihost dbs * thread safety for journafile size * handle other cases of inconsistent collected pages * make health thread check if it should be running in key loops * do not log uuids Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'daemon')
-rw-r--r--daemon/event_loop.c15
-rw-r--r--daemon/event_loop.h16
-rw-r--r--daemon/main.c4
-rw-r--r--daemon/main.h1
4 files changed, 15 insertions, 21 deletions
diff --git a/daemon/event_loop.c b/daemon/event_loop.c
index 86a0220657..1dde811466 100644
--- a/daemon/event_loop.c
+++ b/daemon/event_loop.c
@@ -13,15 +13,9 @@ void register_libuv_worker_jobs() {
registered = true;
worker_register("LIBUV");
- worker_register_job_name(UV_EVENT_READ_PAGE_CB, "read page cb");
- worker_register_job_name(UV_EVENT_READ_EXTENT_CB, "read extent cb");
- worker_register_job_name(UV_EVENT_COMMIT_PAGE_CB, "commit cb");
- worker_register_job_name(UV_EVENT_FLUSH_PAGES_CB, "flush cb");
worker_register_job_name(UV_EVENT_PAGE_LOOKUP, "page lookup");
worker_register_job_name(UV_EVENT_PAGE_POPULATION, "populate page");
worker_register_job_name(UV_EVENT_EXT_DECOMPRESSION, "extent decompression");
- worker_register_job_name(UV_EVENT_READ_MMAP_EXTENT, "read extent (mmap)");
- worker_register_job_name(UV_EVENT_EXTENT_PROCESSING, "extent processing");
worker_register_job_name(UV_EVENT_METADATA_STORE, "store host metadata");
worker_register_job_name(UV_EVENT_JOURNAL_INDEX_WAIT, "journal v2 wait");
worker_register_job_name(UV_EVENT_JOURNAL_INDEX, "journal v2 indexing");
@@ -29,12 +23,8 @@ void register_libuv_worker_jobs() {
worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup");
worker_register_job_name(UV_EVENT_EXTENT_CACHE, "extent cache");
worker_register_job_name(UV_EVENT_EXTENT_MMAP, "extent mmap");
- worker_register_job_name(UV_EVENT_PAGE_DISPATCH, "dispatch page list");
- worker_register_job_name(UV_EVENT_FLUSH_CALLBACK, "flush callback");
worker_register_job_name(UV_EVENT_FLUSH_MAIN, "flush main");
- worker_register_job_name(UV_EVENT_FLUSH_OPEN, "flush open");
worker_register_job_name(UV_EVENT_EVICT_MAIN, "evict main");
- worker_register_job_name(UV_EVENT_DELETING_FILE, "delete datafiles");
worker_register_job_name(UV_EVENT_ANALYZE_V2, "analyze journalfile");
worker_register_job_name(UV_EVENT_RETENTION_V2, "calculate retention");
worker_register_job_name(UV_EVENT_RETENTION_UPDATE, "update retention");
@@ -43,6 +33,11 @@ void register_libuv_worker_jobs() {
worker_register_job_name(UV_EVENT_FLUSHED_TO_OPEN, "flushed to open");
worker_register_job_name(UV_EVENT_PREP_QUERY, "prep query");
worker_register_job_name(UV_EVENT_WORKER_INIT, "worker init");
+ worker_register_job_name(UV_EVENT_FLUSH_PAGES, "flush pages");
+ worker_register_job_name(UV_EVENT_BUFFERS_CLEANUP, "buffers cleanup");
+ worker_register_job_name(UV_EVENT_QUIESCE, "quiesce");
+ worker_register_job_name(UV_EVENT_POPULATE_MRG, "populate mrg");
+ worker_register_job_name(UV_EVENT_SHUTDOWN, "shutdown");
uv_thread_set_name_np(pthread_self(), "LIBUV_WORKER");
}
diff --git a/daemon/event_loop.h b/daemon/event_loop.h
index 010daea513..6b72dd4801 100644
--- a/daemon/event_loop.h
+++ b/daemon/event_loop.h
@@ -5,15 +5,9 @@
enum event_loop_job {
UV_EVENT_JOB_NONE = 0,
- UV_EVENT_READ_PAGE_CB,
- UV_EVENT_READ_EXTENT_CB,
- UV_EVENT_COMMIT_PAGE_CB,
- UV_EVENT_FLUSH_PAGES_CB,
UV_EVENT_EXT_DECOMPRESSION,
UV_EVENT_PAGE_LOOKUP,
UV_EVENT_PAGE_POPULATION,
- UV_EVENT_READ_MMAP_EXTENT,
- UV_EVENT_EXTENT_PROCESSING,
UV_EVENT_METADATA_STORE,
UV_EVENT_JOURNAL_INDEX_WAIT,
UV_EVENT_JOURNAL_INDEX,
@@ -21,13 +15,8 @@ enum event_loop_job {
UV_EVENT_METADATA_CLEANUP,
UV_EVENT_EXTENT_CACHE,
UV_EVENT_EXTENT_MMAP,
- UV_EVENT_FLUSH_CALLBACK,
- UV_EVENT_EXTEXT_DISPATCH,
UV_EVENT_FLUSH_MAIN,
- UV_EVENT_FLUSH_OPEN,
UV_EVENT_EVICT_MAIN,
- UV_EVENT_PAGE_DISPATCH,
- UV_EVENT_DELETING_FILE,
UV_EVENT_ANALYZE_V2,
UV_EVENT_RETENTION_V2,
UV_EVENT_RETENTION_UPDATE,
@@ -36,6 +25,11 @@ enum event_loop_job {
UV_EVENT_FLUSHED_TO_OPEN,
UV_EVENT_PREP_QUERY,
UV_EVENT_WORKER_INIT,
+ UV_EVENT_FLUSH_PAGES,
+ UV_EVENT_BUFFERS_CLEANUP,
+ UV_EVENT_QUIESCE,
+ UV_EVENT_POPULATE_MRG,
+ UV_EVENT_SHUTDOWN,
};
void register_libuv_worker_jobs();
diff --git a/daemon/main.c b/daemon/main.c
index e5ab9d13a1..c4217ea1bf 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -72,10 +72,12 @@ SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t
*PValue = sth;
switch(thread_type) {
+ default:
case SERVICE_THREAD_TYPE_NETDATA:
sth->netdata_thread = netdata_thread_self();
break;
+ case SERVICE_THREAD_TYPE_EVENT_LOOP:
case SERVICE_THREAD_TYPE_LIBUV:
sth->uv_thread = uv_thread_self();
break;
@@ -197,10 +199,12 @@ static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) {
sth->cancelled = true;
switch(sth->type) {
+ default:
case SERVICE_THREAD_TYPE_NETDATA:
netdata_thread_cancel(sth->netdata_thread);
break;
+ case SERVICE_THREAD_TYPE_EVENT_LOOP:
case SERVICE_THREAD_TYPE_LIBUV:
break;
}
diff --git a/daemon/main.h b/daemon/main.h
index 1d83eb9fa4..8704d60977 100644
--- a/daemon/main.h
+++ b/daemon/main.h
@@ -48,6 +48,7 @@ typedef enum {
typedef enum {
SERVICE_THREAD_TYPE_NETDATA,
SERVICE_THREAD_TYPE_LIBUV,
+ SERVICE_THREAD_TYPE_EVENT_LOOP,
} SERVICE_THREAD_TYPE;
typedef void (*force_quit_t)(void *data);