diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2023-01-23 22:18:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-23 22:18:44 +0200 |
commit | dd0f7ae992a8de282c77dc7745c5090e5d65cc28 (patch) | |
tree | fecf5514eda33c0a96f4d359f30fd07229d12cf7 /daemon | |
parent | c2c3876c519fbc22a60a5d8b753dc6d8e81e0fed (diff) |
DBENGINE v2 - improvements part 7 (#14307)
* run cleanup in workers
* when there is a discrepancy between update every, fix it
* fix the other occurences of metric update every mismatch
* allow resetting the same timestamp
* validate flushed pages before committing them to disk
* initialize collection with the latest time in mrg
* these should be static functions
* acquire metrics for writing to detect multiple data collections of the same metric
* print the uuid of the metric that is collected twice
* log the discrepancies of completed pages
* 1 second tolerance
* unify validation of pages and related logging across dbengine
* make do_flush_pages() thread safe
* flush pages runs on libuv workers
* added uv events to tp workers
* dont cross datafile spinlock and rwlock
* should be unlock
* prevent the creation of multiple datafiles
* break an infinite replication loop
* do not log the epxansion of the replication window due to start streaming
* log all invalid pages with internal checks
* do not shutdown event loop threads
* add information about collected page events, to find the root cause of invalid collected pages
* rewrite of the gap filling to fix the invalid collected pages problem
* handle multiple collections of the same metric gracefully
* added log about main cache page conflicts; fix gap filling once again...
* keep track of the first metric writer
* it should be an internal fatal - it does not harm users
* do not check of future timestamps on collected pages, since we inherit the clock of the children; do not check collected pages validity without internal checks
* prevent negative replication completion percentage
* internal error for the discrepancy of mrg
* better logging of dbengine new metrics collection
* without internal checks it is unused
* prevent pluginsd crash on exit due to calling pthread_cancel() on an exited thread
* renames and atomics everywhere
* if a datafile cannot be acquired for deletion during shutdown, continue - this can happen when there are hot pages in open cache referencing it
* Debug for context load
* rrdcontext uuid debug
* rrddim uuid debug
* rrdeng uuid debug
* Revert "rrdeng uuid debug"
This reverts commit 393da190826a582e7e6cc90771bf91b175826d8b.
* Revert "rrddim uuid debug"
This reverts commit 72150b30408294f141b19afcfb35abd7c34777d8.
* Revert "rrdcontext uuid debug"
This reverts commit 2c3b940dc23f460226e9b2a6861c214e840044d0.
* Revert "Debug for context load"
This reverts commit 0d880fc1589f128524e0b47abd9ff0714283ce3b.
* do not use legacy uuids on multihost dbs
* thread safety for journafile size
* handle other cases of inconsistent collected pages
* make health thread check if it should be running in key loops
* do not log uuids
Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com>
Diffstat (limited to 'daemon')
-rw-r--r-- | daemon/event_loop.c | 15 | ||||
-rw-r--r-- | daemon/event_loop.h | 16 | ||||
-rw-r--r-- | daemon/main.c | 4 | ||||
-rw-r--r-- | daemon/main.h | 1 |
4 files changed, 15 insertions, 21 deletions
diff --git a/daemon/event_loop.c b/daemon/event_loop.c index 86a0220657..1dde811466 100644 --- a/daemon/event_loop.c +++ b/daemon/event_loop.c @@ -13,15 +13,9 @@ void register_libuv_worker_jobs() { registered = true; worker_register("LIBUV"); - worker_register_job_name(UV_EVENT_READ_PAGE_CB, "read page cb"); - worker_register_job_name(UV_EVENT_READ_EXTENT_CB, "read extent cb"); - worker_register_job_name(UV_EVENT_COMMIT_PAGE_CB, "commit cb"); - worker_register_job_name(UV_EVENT_FLUSH_PAGES_CB, "flush cb"); worker_register_job_name(UV_EVENT_PAGE_LOOKUP, "page lookup"); worker_register_job_name(UV_EVENT_PAGE_POPULATION, "populate page"); worker_register_job_name(UV_EVENT_EXT_DECOMPRESSION, "extent decompression"); - worker_register_job_name(UV_EVENT_READ_MMAP_EXTENT, "read extent (mmap)"); - worker_register_job_name(UV_EVENT_EXTENT_PROCESSING, "extent processing"); worker_register_job_name(UV_EVENT_METADATA_STORE, "store host metadata"); worker_register_job_name(UV_EVENT_JOURNAL_INDEX_WAIT, "journal v2 wait"); worker_register_job_name(UV_EVENT_JOURNAL_INDEX, "journal v2 indexing"); @@ -29,12 +23,8 @@ void register_libuv_worker_jobs() { worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup"); worker_register_job_name(UV_EVENT_EXTENT_CACHE, "extent cache"); worker_register_job_name(UV_EVENT_EXTENT_MMAP, "extent mmap"); - worker_register_job_name(UV_EVENT_PAGE_DISPATCH, "dispatch page list"); - worker_register_job_name(UV_EVENT_FLUSH_CALLBACK, "flush callback"); worker_register_job_name(UV_EVENT_FLUSH_MAIN, "flush main"); - worker_register_job_name(UV_EVENT_FLUSH_OPEN, "flush open"); worker_register_job_name(UV_EVENT_EVICT_MAIN, "evict main"); - worker_register_job_name(UV_EVENT_DELETING_FILE, "delete datafiles"); worker_register_job_name(UV_EVENT_ANALYZE_V2, "analyze journalfile"); worker_register_job_name(UV_EVENT_RETENTION_V2, "calculate retention"); worker_register_job_name(UV_EVENT_RETENTION_UPDATE, "update retention"); @@ -43,6 +33,11 @@ void register_libuv_worker_jobs() { worker_register_job_name(UV_EVENT_FLUSHED_TO_OPEN, "flushed to open"); worker_register_job_name(UV_EVENT_PREP_QUERY, "prep query"); worker_register_job_name(UV_EVENT_WORKER_INIT, "worker init"); + worker_register_job_name(UV_EVENT_FLUSH_PAGES, "flush pages"); + worker_register_job_name(UV_EVENT_BUFFERS_CLEANUP, "buffers cleanup"); + worker_register_job_name(UV_EVENT_QUIESCE, "quiesce"); + worker_register_job_name(UV_EVENT_POPULATE_MRG, "populate mrg"); + worker_register_job_name(UV_EVENT_SHUTDOWN, "shutdown"); uv_thread_set_name_np(pthread_self(), "LIBUV_WORKER"); } diff --git a/daemon/event_loop.h b/daemon/event_loop.h index 010daea513..6b72dd4801 100644 --- a/daemon/event_loop.h +++ b/daemon/event_loop.h @@ -5,15 +5,9 @@ enum event_loop_job { UV_EVENT_JOB_NONE = 0, - UV_EVENT_READ_PAGE_CB, - UV_EVENT_READ_EXTENT_CB, - UV_EVENT_COMMIT_PAGE_CB, - UV_EVENT_FLUSH_PAGES_CB, UV_EVENT_EXT_DECOMPRESSION, UV_EVENT_PAGE_LOOKUP, UV_EVENT_PAGE_POPULATION, - UV_EVENT_READ_MMAP_EXTENT, - UV_EVENT_EXTENT_PROCESSING, UV_EVENT_METADATA_STORE, UV_EVENT_JOURNAL_INDEX_WAIT, UV_EVENT_JOURNAL_INDEX, @@ -21,13 +15,8 @@ enum event_loop_job { UV_EVENT_METADATA_CLEANUP, UV_EVENT_EXTENT_CACHE, UV_EVENT_EXTENT_MMAP, - UV_EVENT_FLUSH_CALLBACK, - UV_EVENT_EXTEXT_DISPATCH, UV_EVENT_FLUSH_MAIN, - UV_EVENT_FLUSH_OPEN, UV_EVENT_EVICT_MAIN, - UV_EVENT_PAGE_DISPATCH, - UV_EVENT_DELETING_FILE, UV_EVENT_ANALYZE_V2, UV_EVENT_RETENTION_V2, UV_EVENT_RETENTION_UPDATE, @@ -36,6 +25,11 @@ enum event_loop_job { UV_EVENT_FLUSHED_TO_OPEN, UV_EVENT_PREP_QUERY, UV_EVENT_WORKER_INIT, + UV_EVENT_FLUSH_PAGES, + UV_EVENT_BUFFERS_CLEANUP, + UV_EVENT_QUIESCE, + UV_EVENT_POPULATE_MRG, + UV_EVENT_SHUTDOWN, }; void register_libuv_worker_jobs(); diff --git a/daemon/main.c b/daemon/main.c index e5ab9d13a1..c4217ea1bf 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -72,10 +72,12 @@ SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t *PValue = sth; switch(thread_type) { + default: case SERVICE_THREAD_TYPE_NETDATA: sth->netdata_thread = netdata_thread_self(); break; + case SERVICE_THREAD_TYPE_EVENT_LOOP: case SERVICE_THREAD_TYPE_LIBUV: sth->uv_thread = uv_thread_self(); break; @@ -197,10 +199,12 @@ static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) { sth->cancelled = true; switch(sth->type) { + default: case SERVICE_THREAD_TYPE_NETDATA: netdata_thread_cancel(sth->netdata_thread); break; + case SERVICE_THREAD_TYPE_EVENT_LOOP: case SERVICE_THREAD_TYPE_LIBUV: break; } diff --git a/daemon/main.h b/daemon/main.h index 1d83eb9fa4..8704d60977 100644 --- a/daemon/main.h +++ b/daemon/main.h @@ -48,6 +48,7 @@ typedef enum { typedef enum { SERVICE_THREAD_TYPE_NETDATA, SERVICE_THREAD_TYPE_LIBUV, + SERVICE_THREAD_TYPE_EVENT_LOOP, } SERVICE_THREAD_TYPE; typedef void (*force_quit_t)(void *data); |