diff options
author | Markos Fountoulakis <44345837+mfundul@users.noreply.github.com> | 2019-12-16 16:14:55 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-12-16 16:14:55 +0200 |
commit | 53ab093d84919c743450199a31bca9a13412e451 (patch) | |
tree | 3aa6b7a811784b7ec565c62a0c715ad484771de6 /daemon | |
parent | c8c72f18a6a8fd09d3b6284e49525396b24e8395 (diff) |
Fix race condition in dbengine (#7533)
Diffstat (limited to 'daemon')
-rw-r--r-- | daemon/main.c | 12 | ||||
-rw-r--r-- | daemon/unit_test.c | 83 | ||||
-rw-r--r-- | daemon/unit_test.h | 2 |
3 files changed, 65 insertions, 32 deletions
diff --git a/daemon/main.c b/daemon/main.c index ece8feb447..c8cd968fbf 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -340,10 +340,12 @@ int help(int exitcode) { " -W unittest Run internal unittests and exit.\n\n" #ifdef ENABLE_DBENGINE " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n" - " -W stresstest=A,B,C,D,E Run a DB engine stress test for A seconds,\n" + " -W stresstest=A,B,C,D,E,F\n" + " Run a DB engine stress test for A seconds,\n" " with B writers and C readers, with a ramp up\n" " time of D seconds for writers, a page cache\n" - " size of E MiB, and exit.\n\n" + " size of E MiB, an optional disk space limit\n" + " of F MiB and exit.\n\n" #endif " -W set section option value\n" " set netdata.conf option from the command line.\n\n" @@ -956,7 +958,7 @@ int main(int argc, char **argv) { else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) { char *endptr; unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0, - page_cache_mb = 0; + page_cache_mb = 0, disk_space_mb = 0; optarg += strlen(stresstest_string); test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0); @@ -968,8 +970,10 @@ int main(int argc, char **argv) { ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0); if (',' == *endptr) page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds, - page_cache_mb); + page_cache_mb, disk_space_mb); return 0; } #endif diff --git a/daemon/unit_test.c b/daemon/unit_test.c index 2e59273214..bae2a3418e 100644 --- a/daemon/unit_test.c +++ b/daemon/unit_test.c @@ -1723,7 +1723,7 @@ int test_dbengine(void) default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; - debug(D_RRDHOST, "Initializing localhost with hostname 'unittest-dbengine'"); + fprintf(stderr, "Initializing localhost with hostname 'unittest-dbengine'\n"); host = dbengine_rrdhost_find_or_create("unittest-dbengine"); if (NULL == host) return 1; @@ -1915,6 +1915,9 @@ static void generate_dbengine_chart(void *arg) rrdset_done(st); thread_info->time_max = time_current; } + for (j = 0; j < DSET_DIMS; ++j) { + rrdeng_store_metric_finalize(rd[j]); + } } void generate_dbengine_dataset(unsigned history_seconds) @@ -1935,7 +1938,7 @@ void generate_dbengine_dataset(unsigned history_seconds) default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; error_log_limit_unlimited(); - debug(D_RRDHOST, "Initializing localhost with hostname 'dbengine-dataset'"); + fprintf(stderr, "Initializing localhost with hostname 'dbengine-dataset'\n"); host = dbengine_rrdhost_find_or_create("dbengine-dataset"); if (NULL == host) @@ -1986,6 +1989,7 @@ struct dbengine_query_thread { unsigned history_seconds; /* how far back in the past to go */ volatile long done; /* initialize to 0, set to 1 to stop thread */ unsigned long errors, queries_nr, queried_metrics_nr; /* statistics */ + uint8_t delete_old_data; /* if non zero then data are deleted when disk space is exhausted */ struct dbengine_chart_thread *chart_threads[]; /* dset_charts elements */ }; @@ -1995,7 +1999,7 @@ static void query_dbengine_chart(void *arg) struct dbengine_query_thread *thread_info = (struct dbengine_query_thread *)arg; const int DSET_CHARTS = thread_info->dset_charts; const int DSET_DIMS = thread_info->dset_dims; - time_t time_after, time_before, time_min, time_max, duration; + time_t time_after, time_before, time_min, time_approx_min, time_max, duration; int i, j, update_every = 1; RRDSET *st; RRDDIM *rd; @@ -2015,6 +2019,13 @@ static void query_dbengine_chart(void *arg) time_min = thread_info->time_present - thread_info->history_seconds + 1; time_max = thread_info->chart_threads[i]->time_max; + + if (thread_info->delete_old_data) { + /* A time window of twice the disk space is sufficient for compression space savings of up to 50% */ + time_approx_min = time_max - (default_rrdeng_disk_quota_mb * 2 * 1024 * 1024) / + (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number)); + time_min = MAX(time_min, time_approx_min); + } if (!time_max) { time_before = time_after = time_min; } else { @@ -2030,18 +2041,22 @@ static void query_dbengine_chart(void *arg) expected = unpack_storage_number(pack_storage_number((calculated_number) generatedv, SN_EXISTS)); if (unlikely(rd->state->query_ops.is_finished(&handle))) { - fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " - CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n", - st->name, rd->name, (unsigned long) time_now, expected); - ++thread_info->errors; + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " + CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n", + st->name, rd->name, (unsigned long) time_now, expected); + ++thread_info->errors; + } break; } n = rd->state->query_ops.next_metric(&handle, &time_retrieved); if (SN_EMPTY_SLOT == n) { - fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " - CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n", - st->name, rd->name, (unsigned long) time_now, expected); - ++thread_info->errors; + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " + CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n", + st->name, rd->name, (unsigned long) time_now, expected); + ++thread_info->errors; + } break; } ++thread_info->queried_metrics_nr; @@ -2049,15 +2064,21 @@ static void query_dbengine_chart(void *arg) same = (calculated_number_round(value) == calculated_number_round(expected)) ? 1 : 0; if (!same) { - fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " - CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", ### E R R O R ###\n", - st->name, rd->name, (unsigned long) time_now, expected, value); - ++thread_info->errors; + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " + CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT + ", ### E R R O R ###\n", + st->name, rd->name, (unsigned long) time_now, expected, value); + ++thread_info->errors; + } } if (time_retrieved != time_now) { - fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n", - st->name, rd->name, (unsigned long) time_now, (unsigned long) time_retrieved); - ++thread_info->errors; + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, + " DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n", + st->name, rd->name, (unsigned long) time_now, (unsigned long) time_retrieved); + ++thread_info->errors; + } } } rd->state->query_ops.finalize(&handle); @@ -2065,17 +2086,19 @@ static void query_dbengine_chart(void *arg) } void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, - unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB) + unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB) { const unsigned DSET_DIMS = 128; const uint64_t EXPECTED_COMPRESSION_RATIO = 20; - const unsigned HISTORY_SECONDS = 3600 * 24 * 365; /* 1 year of history */ + const unsigned HISTORY_SECONDS = 3600 * 24 * 365 * 50; /* 50 years of history */ RRDHOST *host = NULL; struct dbengine_chart_thread **chart_threads; struct dbengine_query_thread **query_threads; unsigned i, j; time_t time_start, time_end; + error_log_limit_unlimited(); + if (!TEST_DURATION_SEC) TEST_DURATION_SEC = 10; if (!DSET_CHARTS) @@ -2087,13 +2110,18 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; default_rrdeng_page_cache_mb = PAGE_CACHE_MB; - // Worst case for uncompressible data - default_rrdeng_disk_quota_mb = (((uint64_t)DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * HISTORY_SECONDS) / - (1024 * 1024); - default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; + if (DISK_SPACE_MB) { + fprintf(stderr, "By setting disk space limit data are allowed to be deleted. " + "Data validation is turned off for this run.\n"); + default_rrdeng_disk_quota_mb = DISK_SPACE_MB; + } else { + // Worst case for uncompressible data + default_rrdeng_disk_quota_mb = + (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * HISTORY_SECONDS) / (1024 * 1024); + default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; + } - error_log_limit_unlimited(); - debug(D_RRDHOST, "Initializing localhost with hostname 'dbengine-stress-test'"); + fprintf(stderr, "Initializing localhost with hostname 'dbengine-stress-test'\n"); host = dbengine_rrdhost_find_or_create("dbengine-stress-test"); if (NULL == host) @@ -2112,7 +2140,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi "%u MiB of page cache.\n", RAMP_UP_SECONDS, TEST_DURATION_SEC, DSET_CHARTS, QUERY_THREADS, PAGE_CACHE_MB); - time_start = now_realtime_sec(); + time_start = now_realtime_sec() + HISTORY_SECONDS; /* move history to the future */ for (i = 0 ; i < DSET_CHARTS ; ++i) { chart_threads[i]->host = host; chart_threads[i]->chartname = "random"; @@ -2146,6 +2174,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi for (j = 0 ; j < DSET_CHARTS ; ++j) { query_threads[i]->chart_threads[j] = chart_threads[j]; } + query_threads[i]->delete_old_data = DISK_SPACE_MB ? 1 : 0; assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i])); } sleep(TEST_DURATION_SEC); diff --git a/daemon/unit_test.h b/daemon/unit_test.h index 230a700858..79d415be04 100644 --- a/daemon/unit_test.h +++ b/daemon/unit_test.h @@ -12,7 +12,7 @@ extern int unit_test_buffer(void); extern int test_dbengine(void); extern void generate_dbengine_dataset(unsigned history_seconds); extern void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, - unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB); + unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB); #endif |