summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorMarkos Fountoulakis <44345837+mfundul@users.noreply.github.com>2019-12-16 16:14:55 +0200
committerGitHub <noreply@github.com>2019-12-16 16:14:55 +0200
commit53ab093d84919c743450199a31bca9a13412e451 (patch)
tree3aa6b7a811784b7ec565c62a0c715ad484771de6 /daemon
parentc8c72f18a6a8fd09d3b6284e49525396b24e8395 (diff)
Fix race condition in dbengine (#7533)
Diffstat (limited to 'daemon')
-rw-r--r--daemon/main.c12
-rw-r--r--daemon/unit_test.c83
-rw-r--r--daemon/unit_test.h2
3 files changed, 65 insertions, 32 deletions
diff --git a/daemon/main.c b/daemon/main.c
index ece8feb447..c8cd968fbf 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -340,10 +340,12 @@ int help(int exitcode) {
" -W unittest Run internal unittests and exit.\n\n"
#ifdef ENABLE_DBENGINE
" -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
- " -W stresstest=A,B,C,D,E Run a DB engine stress test for A seconds,\n"
+ " -W stresstest=A,B,C,D,E,F\n"
+ " Run a DB engine stress test for A seconds,\n"
" with B writers and C readers, with a ramp up\n"
" time of D seconds for writers, a page cache\n"
- " size of E MiB, and exit.\n\n"
+ " size of E MiB, an optional disk space limit\n"
+ " of F MiB and exit.\n\n"
#endif
" -W set section option value\n"
" set netdata.conf option from the command line.\n\n"
@@ -956,7 +958,7 @@ int main(int argc, char **argv) {
else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) {
char *endptr;
unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0,
- page_cache_mb = 0;
+ page_cache_mb = 0, disk_space_mb = 0;
optarg += strlen(stresstest_string);
test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0);
@@ -968,8 +970,10 @@ int main(int argc, char **argv) {
ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0);
if (',' == *endptr)
page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
+ if (',' == *endptr)
+ disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds,
- page_cache_mb);
+ page_cache_mb, disk_space_mb);
return 0;
}
#endif
diff --git a/daemon/unit_test.c b/daemon/unit_test.c
index 2e59273214..bae2a3418e 100644
--- a/daemon/unit_test.c
+++ b/daemon/unit_test.c
@@ -1723,7 +1723,7 @@ int test_dbengine(void)
default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
- debug(D_RRDHOST, "Initializing localhost with hostname 'unittest-dbengine'");
+ fprintf(stderr, "Initializing localhost with hostname 'unittest-dbengine'\n");
host = dbengine_rrdhost_find_or_create("unittest-dbengine");
if (NULL == host)
return 1;
@@ -1915,6 +1915,9 @@ static void generate_dbengine_chart(void *arg)
rrdset_done(st);
thread_info->time_max = time_current;
}
+ for (j = 0; j < DSET_DIMS; ++j) {
+ rrdeng_store_metric_finalize(rd[j]);
+ }
}
void generate_dbengine_dataset(unsigned history_seconds)
@@ -1935,7 +1938,7 @@ void generate_dbengine_dataset(unsigned history_seconds)
default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100;
error_log_limit_unlimited();
- debug(D_RRDHOST, "Initializing localhost with hostname 'dbengine-dataset'");
+ fprintf(stderr, "Initializing localhost with hostname 'dbengine-dataset'\n");
host = dbengine_rrdhost_find_or_create("dbengine-dataset");
if (NULL == host)
@@ -1986,6 +1989,7 @@ struct dbengine_query_thread {
unsigned history_seconds; /* how far back in the past to go */
volatile long done; /* initialize to 0, set to 1 to stop thread */
unsigned long errors, queries_nr, queried_metrics_nr; /* statistics */
+ uint8_t delete_old_data; /* if non zero then data are deleted when disk space is exhausted */
struct dbengine_chart_thread *chart_threads[]; /* dset_charts elements */
};
@@ -1995,7 +1999,7 @@ static void query_dbengine_chart(void *arg)
struct dbengine_query_thread *thread_info = (struct dbengine_query_thread *)arg;
const int DSET_CHARTS = thread_info->dset_charts;
const int DSET_DIMS = thread_info->dset_dims;
- time_t time_after, time_before, time_min, time_max, duration;
+ time_t time_after, time_before, time_min, time_approx_min, time_max, duration;
int i, j, update_every = 1;
RRDSET *st;
RRDDIM *rd;
@@ -2015,6 +2019,13 @@ static void query_dbengine_chart(void *arg)
time_min = thread_info->time_present - thread_info->history_seconds + 1;
time_max = thread_info->chart_threads[i]->time_max;
+
+ if (thread_info->delete_old_data) {
+ /* A time window of twice the disk space is sufficient for compression space savings of up to 50% */
+ time_approx_min = time_max - (default_rrdeng_disk_quota_mb * 2 * 1024 * 1024) /
+ (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number));
+ time_min = MAX(time_min, time_approx_min);
+ }
if (!time_max) {
time_before = time_after = time_min;
} else {
@@ -2030,18 +2041,22 @@ static void query_dbengine_chart(void *arg)
expected = unpack_storage_number(pack_storage_number((calculated_number) generatedv, SN_EXISTS));
if (unlikely(rd->state->query_ops.is_finished(&handle))) {
- fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value "
- CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n",
- st->name, rd->name, (unsigned long) time_now, expected);
- ++thread_info->errors;
+ if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
+ fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value "
+ CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n",
+ st->name, rd->name, (unsigned long) time_now, expected);
+ ++thread_info->errors;
+ }
break;
}
n = rd->state->query_ops.next_metric(&handle, &time_retrieved);
if (SN_EMPTY_SLOT == n) {
- fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value "
- CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n",
- st->name, rd->name, (unsigned long) time_now, expected);
- ++thread_info->errors;
+ if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
+ fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value "
+ CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n",
+ st->name, rd->name, (unsigned long) time_now, expected);
+ ++thread_info->errors;
+ }
break;
}
++thread_info->queried_metrics_nr;
@@ -2049,15 +2064,21 @@ static void query_dbengine_chart(void *arg)
same = (calculated_number_round(value) == calculated_number_round(expected)) ? 1 : 0;
if (!same) {
- fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value "
- CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", ### E R R O R ###\n",
- st->name, rd->name, (unsigned long) time_now, expected, value);
- ++thread_info->errors;
+ if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
+ fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value "
+ CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT
+ ", ### E R R O R ###\n",
+ st->name, rd->name, (unsigned long) time_now, expected, value);
+ ++thread_info->errors;
+ }
}
if (time_retrieved != time_now) {
- fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n",
- st->name, rd->name, (unsigned long) time_now, (unsigned long) time_retrieved);
- ++thread_info->errors;
+ if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
+ fprintf(stderr,
+ " DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n",
+ st->name, rd->name, (unsigned long) time_now, (unsigned long) time_retrieved);
+ ++thread_info->errors;
+ }
}
}
rd->state->query_ops.finalize(&handle);
@@ -2065,17 +2086,19 @@ static void query_dbengine_chart(void *arg)
}
void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS,
- unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB)
+ unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB)
{
const unsigned DSET_DIMS = 128;
const uint64_t EXPECTED_COMPRESSION_RATIO = 20;
- const unsigned HISTORY_SECONDS = 3600 * 24 * 365; /* 1 year of history */
+ const unsigned HISTORY_SECONDS = 3600 * 24 * 365 * 50; /* 50 years of history */
RRDHOST *host = NULL;
struct dbengine_chart_thread **chart_threads;
struct dbengine_query_thread **query_threads;
unsigned i, j;
time_t time_start, time_end;
+ error_log_limit_unlimited();
+
if (!TEST_DURATION_SEC)
TEST_DURATION_SEC = 10;
if (!DSET_CHARTS)
@@ -2087,13 +2110,18 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi
default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
default_rrdeng_page_cache_mb = PAGE_CACHE_MB;
- // Worst case for uncompressible data
- default_rrdeng_disk_quota_mb = (((uint64_t)DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * HISTORY_SECONDS) /
- (1024 * 1024);
- default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100;
+ if (DISK_SPACE_MB) {
+ fprintf(stderr, "By setting disk space limit data are allowed to be deleted. "
+ "Data validation is turned off for this run.\n");
+ default_rrdeng_disk_quota_mb = DISK_SPACE_MB;
+ } else {
+ // Worst case for uncompressible data
+ default_rrdeng_disk_quota_mb =
+ (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * HISTORY_SECONDS) / (1024 * 1024);
+ default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100;
+ }
- error_log_limit_unlimited();
- debug(D_RRDHOST, "Initializing localhost with hostname 'dbengine-stress-test'");
+ fprintf(stderr, "Initializing localhost with hostname 'dbengine-stress-test'\n");
host = dbengine_rrdhost_find_or_create("dbengine-stress-test");
if (NULL == host)
@@ -2112,7 +2140,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi
"%u MiB of page cache.\n",
RAMP_UP_SECONDS, TEST_DURATION_SEC, DSET_CHARTS, QUERY_THREADS, PAGE_CACHE_MB);
- time_start = now_realtime_sec();
+ time_start = now_realtime_sec() + HISTORY_SECONDS; /* move history to the future */
for (i = 0 ; i < DSET_CHARTS ; ++i) {
chart_threads[i]->host = host;
chart_threads[i]->chartname = "random";
@@ -2146,6 +2174,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi
for (j = 0 ; j < DSET_CHARTS ; ++j) {
query_threads[i]->chart_threads[j] = chart_threads[j];
}
+ query_threads[i]->delete_old_data = DISK_SPACE_MB ? 1 : 0;
assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i]));
}
sleep(TEST_DURATION_SEC);
diff --git a/daemon/unit_test.h b/daemon/unit_test.h
index 230a700858..79d415be04 100644
--- a/daemon/unit_test.h
+++ b/daemon/unit_test.h
@@ -12,7 +12,7 @@ extern int unit_test_buffer(void);
extern int test_dbengine(void);
extern void generate_dbengine_dataset(unsigned history_seconds);
extern void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS,
- unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB);
+ unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB);
#endif