Make dbengine the default memory mode (#6977)

* Basic functionality for dbengine stress test. * Fix coverity defects * Refactored dbengine stress test to be configurable * Added benchmark results and evaluation in dbengine documentation * Make dbengine the default memory mode
author: Markos Fountoulakis <44345837+mfundul@users.noreply.github.com> 2019-10-03 17:04:51 +0300
committer: GitHub <noreply@github.com> 2019-10-03 17:04:51 +0300
commit: 95119afff48735607643bfe3824ed3727b6edbb0 (patch)
tree: 7d588b0f7131743d58386c100cb9fb2b0d97c187
parent: 06cdca8fdfb5f8af43a368e9afe0e996fb1ea8fd (diff)
8 files changed, 376 insertions, 88 deletions
diff --git a/daemon/main.c b/daemon/main.c
index a599167f78..4189ac7bd6 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -306,7 +306,13 @@ int help(int exitcode) {
             "  -W stacksize=N           Set the stacksize (in bytes).\n\n"
             "  -W debug_flags=N         Set runtime tracing to debug.log.\n\n"
             "  -W unittest              Run internal unittests and exit.\n\n"
+#ifdef ENABLE_DBENGINE
             "  -W createdataset=N       Create a DB engine dataset of N seconds and exit.\n\n"
+            "  -W stresstest=A,B,C,D,E  Run a DB engine stress test for A seconds,\n"
+            "                           with B writers and C readers, with a ramp up\n"
+            "                           time of D seconds for writers, a page cache\n"
+            "                           size of E MiB, and exit.\n\n"
+#endif
             "  -W set section option value\n"
             "                           set netdata.conf option from the command line.\n\n"
             "  -W simple-pattern pattern string\n"
@@ -887,6 +893,7 @@ int main(int argc, char **argv) {
                         char* stacksize_string = "stacksize=";
                         char* debug_flags_string = "debug_flags=";
                         char* createdataset_string = "createdataset=";
+                        char* stresstest_string = "stresstest=";
 
                         if(strcmp(optarg, "unittest") == 0) {
                             if(unit_test_buffer()) return 1;
@@ -905,14 +912,33 @@ int main(int argc, char **argv) {
                             fprintf(stderr, "\n\nALL TESTS PASSED\n\n");
                             return 0;
                         }
+#ifdef ENABLE_DBENGINE
                         else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
                             optarg += strlen(createdataset_string);
-#ifdef ENABLE_DBENGINE
-                            unsigned history_seconds = (unsigned )strtoull(optarg, NULL, 0);
+                            unsigned history_seconds = strtoul(optarg, NULL, 0);
                             generate_dbengine_dataset(history_seconds);
-#endif
                             return 0;
                         }
+                        else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) {
+                            char *endptr;
+                            unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0,
+                            page_cache_mb = 0;
+
+                            optarg += strlen(stresstest_string);
+                            test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0);
+                            if (',' == *endptr)
+                                dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0);
+                            if (',' == *endptr)
+                                query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0);
+                            if (',' == *endptr)
+                                ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0);
+                            if (',' == *endptr)
+                                page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0);
+                            dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds,
+                                                 page_cache_mb);
+                            return 0;
+                        }
+#endif
                         else if(strcmp(optarg, "simple-pattern") == 0) {
                             if(optind + 2 > argc) {
                                 fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n"
diff --git a/daemon/unit_test.c b/daemon/unit_test.c
index 36ccd9f6b7..31718eeeae 100644
--- a/daemon/unit_test.c
+++ b/daemon/unit_test.c
@@ -1688,7 +1688,8 @@ static time_t test_dbengine_create_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS
             st[i]->usec_since_last_update = USEC_PER_SEC * update_every;
 
             for (j = 0; j < DIMS; ++j) {
-                next = i * DIMS * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c;
+                next = ((collected_number)i * DIMS) * REGION_POINTS[current_region] +
+                       j * REGION_POINTS[current_region] + c;
                 rrddim_set_by_pointer_fake_time(rd[i][j], next, time_now);
             }
             rrdset_done(st[i]);
@@ -1719,13 +1720,14 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI
             for (j = 0; j < DIMS; ++j) {
                 rd[i][j]->state->query_ops.init(rd[i][j], &handle, time_now, time_now + QUERY_BATCH * update_every);
                 for (k = 0; k < QUERY_BATCH; ++k) {
-                    last = i * DIMS * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c + k;
+                    last = ((collected_number)i * DIMS) * REGION_POINTS[current_region] +
+                           j * REGION_POINTS[current_region] + c + k;
                     expected = unpack_storage_number(pack_storage_number((calculated_number)last, SN_EXISTS));
 
                     n = rd[i][j]->state->query_ops.next_metric(&handle, &time_retrieved);
                     value = unpack_storage_number(n);
 
-                    same = (calculated_number_round(value * 10000000.0) == calculated_number_round(expected * 10000000.0)) ? 1 : 0;
+                    same = (calculated_number_round(value) == calculated_number_round(expected)) ? 1 : 0;
                     if(!same) {
                         fprintf(stderr, "    DB-engine unittest %s/%s: at %lu secs, expecting value "
                                         CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", ### E R R O R ###\n",
@@ -1780,7 +1782,7 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]
                     last = i * DIMS * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c;
                     expected = unpack_storage_number(pack_storage_number((calculated_number)last, SN_EXISTS));
 
-                    same = (calculated_number_round(value * 10000000.0) == calculated_number_round(expected * 10000000.0)) ? 1 : 0;
+                    same = (calculated_number_round(value) == calculated_number_round(expected)) ? 1 : 0;
                     if(!same) {
                         fprintf(stderr, "    DB-engine unittest %s/%s: at %lu secs, expecting value "
                                         CALCULATED_NUMBER_FORMAT ", RRDR found " CALCULATED_NUMBER_FORMAT ", ### E R R O R ###\n",
@@ -1902,7 +1904,7 @@ int test_dbengine(void)
                     collected_number last = i * DIMS * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c - point_offset;
                     calculated_number expected = unpack_storage_number(pack_storage_number((calculated_number)last, SN_EXISTS));
 
-                    uint8_t same = (calculated_number_round(value * 10000000.0) == calculated_number_round(expected * 10000000.0)) ? 1 : 0;
+                    uint8_t same = (calculated_number_round(value) == calculated_number_round(expected)) ? 1 : 0;
                     if(!same) {
                         fprintf(stderr, "    DB-engine unittest %s/%s: at %lu secs, expecting value "
                                         CALCULATED_NUMBER_FORMAT ", RRDR found " CALCULATED_NUMBER_FORMAT ", ### E R R O R ###\n",
@@ -1932,20 +1934,27 @@ struct dbengine_chart_thread {
     uv_thread_t thread;
     RRDHOST *host;
     char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */
-    int dset_charts; /* number of charts */
-    int dset_dims; /* dimensions per chart */
-    int chart_i; /* current chart offset */
+    unsigned dset_charts; /* number of charts */
+    unsigned dset_dims; /* dimensions per chart */
+    unsigned chart_i; /* current chart offset */
     time_t time_present; /* current virtual time of the benchmark */
+    volatile time_t time_max; /* latest timestamp of stored values */
     unsigned history_seconds; /* how far back in the past to go */
+
+    volatile long done; /* initialize to 0, set to 1 to stop thread */
+    struct completion charts_initialized;
+    unsigned long errors, stored_metrics_nr; /* statistics */
+
+    RRDSET *st;
+    RRDDIM *rd[]; /* dset_dims elements */
 };
 
-collected_number generate_dbengine_chart_value(struct dbengine_chart_thread *thread_info, int dim_i,
-                                               time_t time_current)
+collected_number generate_dbengine_chart_value(int chart_i, int dim_i, time_t time_current)
 {
     collected_number value;
 
-    value = ((collected_number)time_current) * thread_info->chart_i;
-    value += ((collected_number)time_current) * dim_i;
+    value = ((collected_number)time_current) * (chart_i + 1);
+    value += ((collected_number)time_current) * (dim_i + 1);
     value %= 1024LLU;
 
     return value;
@@ -1956,44 +1965,47 @@ static void generate_dbengine_chart(void *arg)
     struct dbengine_chart_thread *thread_info = (struct dbengine_chart_thread *)arg;
     RRDHOST *host = thread_info->host;
     char *chartname = thread_info->chartname;
-    const int DSET_DIMS = thread_info->dset_dims;
+    const unsigned DSET_DIMS = thread_info->dset_dims;
     unsigned history_seconds = thread_info->history_seconds;
     time_t time_present = thread_info->time_present;
 
-    int j, update_every = 1;
+    unsigned j, update_every = 1;
     RRDSET *st;
     RRDDIM *rd[DSET_DIMS];
     char name[RRD_ID_LENGTH_MAX + 1];
     time_t time_current;
 
     // create the chart
-    snprintfz(name, RRD_ID_LENGTH_MAX, "example_local%d", thread_info->chart_i + 1);
-    st = rrdset_create(host, name, chartname, chartname, "example", NULL, chartname, chartname, chartname, NULL, 1,
-                       update_every, RRDSET_TYPE_LINE);
+    snprintfz(name, RRD_ID_LENGTH_MAX, "example_local%u", thread_info->chart_i + 1);
+    thread_info->st = st = rrdset_create(host, name, chartname, chartname, "example", NULL, chartname, chartname,
+                                         chartname, NULL, 1, update_every, RRDSET_TYPE_LINE);
     for (j = 0 ; j < DSET_DIMS ; ++j) {
-        snprintfz(name, RRD_ID_LENGTH_MAX, "%s%d", chartname, j);
+        snprintfz(name, RRD_ID_LENGTH_MAX, "%s%u", chartname, j + 1);
 
-        rd[j] = rrddim_add(st, name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+        thread_info->rd[j] = rd[j] = rrddim_add(st, name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
     }
+    complete(&thread_info->charts_initialized);
 
     // feed it with the test data
     time_current = time_present - history_seconds;
     for (j = 0 ; j < DSET_DIMS ; ++j) {
         rd[j]->last_collected_time.tv_sec =
-        st->last_collected_time.tv_sec = st->last_updated.tv_sec = time_current;
+        st->last_collected_time.tv_sec = st->last_updated.tv_sec = time_current - update_every;
         rd[j]->last_collected_time.tv_usec =
         st->last_collected_time.tv_usec = st->last_updated.tv_usec = 0;
     }
-    for( ; time_current < time_present ; ++time_current) {
-        st->usec_since_last_update = USEC_PER_SEC;
+    for( ; !thread_info->done && time_current < time_present ; time_current += update_every) {
+        st->usec_since_last_update = USEC_PER_SEC * update_every;
 
         for (j = 0; j < DSET_DIMS; ++j) {
             collected_number value;
 
-            value = generate_dbengine_chart_value(thread_info, j, time_current);
+            value = generate_dbengine_chart_value(thread_info->chart_i, j, time_current);
             rrddim_set_by_pointer_fake_time(rd[j], value, time_current);
+            ++thread_info->stored_metrics_nr;
         }
         rrdset_done(st);
+        thread_info->time_max = time_current;
     }
 }
 
@@ -2003,7 +2015,7 @@ void generate_dbengine_dataset(unsigned history_seconds)
     const int DSET_DIMS = 128;
     const uint64_t EXPECTED_COMPRESSION_RATIO = 20;
     RRDHOST *host = NULL;
-    struct dbengine_chart_thread thread_info[DSET_CHARTS];
+    struct dbengine_chart_thread **thread_info;
     int i;
     time_t time_present;
 
@@ -2021,25 +2033,259 @@ void generate_dbengine_dataset(unsigned history_seconds)
     if (NULL == host)
         return;
 
+    thread_info = mallocz(sizeof(*thread_info) * DSET_CHARTS);
+    for (i = 0 ; i < DSET_CHARTS ; ++i) {
+        thread_info[i] = mallocz(sizeof(*thread_info[i]) + sizeof(RRDDIM *) * DSET_DIMS);
+    }
     fprintf(stderr, "\nRunning DB-engine workload generator\n");
 
     time_present = now_realtime_sec();
     for (i = 0 ; i < DSET_CHARTS ; ++i) {
-        thread_info[i].host = host;
-        thread_info[i].chartname = "random";
-        thread_info[i].dset_charts = DSET_CHARTS;
-        thread_info[i].chart_i = i;
-        thread_info[i].dset_dims = DSET_DIMS;
-        thread_info[i].history_seconds = history_seconds;
-        thread_info[i].time_present = time_present;
-        assert(0 == uv_thread_create(&thread_info[i].thread, generate_dbengine_chart, &thread_info[i]));
+        thread_info[i]->host = host;
+        thread_info[i]->chartname = "random";
+        thread_info[i]->dset_charts = DSET_CHARTS;
+        thread_info[i]->chart_i = i;
+        thread_info[i]->dset_dims = DSET_DIMS;
+        thread_info[i]->history_seconds = history_seconds;
+        thread_info[i]->time_present = time_present;
+        thread_info[i]->time_max = 0;
+        thread_info[i]->done = 0;
+        init_completion(&thread_info[i]->charts_initialized);
+        assert(0 == uv_thread_create(&thread_info[i]->thread, generate_dbengine_chart, thread_info[i]));
+        wait_for_completion(&thread_info[i]->charts_initialized);
+        destroy_completion(&thread_info[i]->charts_initialized);
     }
     for (i = 0 ; i < DSET_CHARTS ; ++i) {
-        assert(0 == uv_thread_join(&thread_info[i].thread));
+        assert(0 == uv_thread_join(&thread_info[i]->thread));
     }
 
+    for (i = 0 ; i < DSET_CHARTS ; ++i) {
+        freez(thread_info[i]);
+    }
+    freez(thread_info);
     rrd_wrlock();
     rrdhost_free(host);
     rrd_unlock();
 }
+
+struct dbengine_query_thread {
+    uv_thread_t thread;
+    RRDHOST *host;
+    char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */
+    unsigned dset_charts; /* number of charts */
+    unsigned dset_dims; /* dimensions per chart */
+    time_t time_present; /* current virtual time of the benchmark */
+    unsigned history_seconds; /* how far back in the past to go */
+    volatile long done; /* initialize to 0, set to 1 to stop thread */
+    unsigned long errors, queries_nr, queried_metrics_nr; /* statistics */
+
+    struct dbengine_chart_thread *chart_threads[]; /* dset_charts elements */
+};
+
+static void query_dbengine_chart(void *arg)
+{
+    struct dbengine_query_thread *thread_info = (struct dbengine_query_thread *)arg;
+    const int DSET_CHARTS = thread_info->dset_charts;
+    const int DSET_DIMS = thread_info->dset_dims;
+    time_t time_after, time_before, time_min, time_max, duration;
+    int i, j, update_every = 1;
+    RRDSET *st;
+    RRDDIM *rd;
+    uint8_t same;
+    time_t time_now, time_retrieved;
+    collected_number generatedv;
+    calculated_number value, expected;
+    storage_number n;
+    struct rrddim_query_handle handle;
+
+    do {
+        // pick a chart and dimension
+        i = random() % DSET_CHARTS;
+        st = thread_info->chart_threads[i]->st;
+        j = random() % DSET_DIMS;
+        rd = thread_info->chart_threads[i]->rd[j];
+
+        time_min = thread_info->time_present - thread_info->history_seconds + 1;
+        time_max = thread_info->chart_threads[i]->time_max;
+        if (!time_max) {
+            time_before = time_after = time_min;
+        } else {
+            time_after = time_min + random() % (MAX(time_max - time_min, 1));
+            duration = random() % 3600;
+            time_before = MIN(time_after + duration, time_max); /* up to 1 hour queries */
+        }
+
+        rd->state->query_ops.init(rd, &handle, time_after, time_before);
+        ++thread_info->queries_nr;
+        for (time_now = time_after ; time_now <= time_before ; time_now += update_every) {
+            generatedv = generate_dbengine_chart_value(i, j, time_now);
+            expected = unpack_storage_number(pack_storage_number((calculated_number) generatedv, SN_EXISTS));
+
+            if (unlikely(rd->state->query_ops.is_finished(&handle))) {
+                fprintf(stderr, "    DB-engine stresstest %s/%s: at %lu secs, expecting value "
+                                CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n",
+                        st->name, rd->name, (unsigned long) time_now, expected);
+                ++thread_info->errors;
+                break;
+            }
+            n = rd->state->query_ops.next_metric(&handle, &time_retrieved);
+            if (SN_EMPTY_SLOT == n) {
+                fprintf(stderr, "    DB-engine stresstest %s/%s: at %lu secs, expecting value "
+                                CALCULATED_NUMBER_FORMAT ", found data gap, ### E R R O R ###\n",
+                        st->name, rd->name, (unsigned long) time_now, expected);
+                ++thread_info->errors;
+                break;
+            }
+            ++thread_info->queried_metrics_nr;
+            value = unpack_storage_number(n);
+
+            same = (calculated_number_round(value) == calculated_number_round(expected)) ? 1 : 0;
+            if (!same) {
+                fprintf(stderr, "    DB-engine stresstest %s/%s: at %lu secs, expecting value "
+                                CALCULATED_NUMBER_FORMAT ", found " CALCULATED_NUMBER_FORMAT ", ### E R R O R ###\n",
+                        st->name, rd->name, (unsigned long) time_now, expected, value);
+                ++thread_info->errors;
+            }
+            if (time_retrieved != time_now) {
+                fprintf(stderr, "    DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n",
+                        st->name, rd->name, (unsigned long) time_now, (unsigned long) time_retrieved);
+                ++thread_info->errors;
+            }
+        }
+        rd->state->query_ops.finalize(&handle);
+    } while(!thread_info->done);
+}
+
+void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS,
+                          unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB)
+{
+    const unsigned DSET_DIMS = 128;
+    const uint64_t EXPECTED_COMPRESSION_RATIO = 20;
+    const unsigned HISTORY_SECONDS = 3600 * 24 * 365; /* 1 year of history */
+    RRDHOST *host = NULL;
+    struct dbengine_chart_thread **chart_threads;
+    struct dbengine_query_thread **query_threads;
+    unsigned i, j;
+    time_t time_start, time_end;
+
+    if (!TEST_DURATION_SEC)
+        TEST_DURATION_SEC = 10;
+    if (!DSET_CHARTS)
+        DSET_CHARTS = 1;
+    if (!QUERY_THREADS)
+        QUERY_THREADS = 1;
+    if (PAGE_CACHE_MB < RRDENG_MIN_PAGE_CACHE_SIZE_MB)
+        PAGE_CACHE_MB = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
+
+    default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
+    default_rrdeng_page_cache_mb = PAGE_CACHE_MB;
+    // Worst case for uncompressible data
+    default_rrdeng_disk_quota_mb = (((uint64_t)DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * HISTORY_SECONDS) /
+                                   (1024 * 1024);
+    default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100;
+
+    error_log_limit_unlimited();
+    debug(D_RRDHOST, "Initializing localhost with hostname 'dbengine-stress-test'");
+
+    host = dbengine_rrdhost_find_or_create("dbengine-stress-test");
+    if (NULL == host)
+        return;
+
+    chart_threads = mallocz(sizeof(*chart_threads) * DSET_CHARTS);
+    for (i = 0 ; i < DSET_CHARTS ; ++i) {
+        chart_threads[i] = mallocz(sizeof(*chart_threads[i]) + sizeof(RRDDIM *) * DSET_DIMS);
+    }
+    query_threads = mallocz(sizeof(*query_threads) * QUERY_THREADS);
+    for (i = 0 ; i < QUERY_THREADS ; ++i) {
+        query_threads[i] = mallocz(sizeof(*query_threads[i]) + sizeof(struct dbengine_chart_thread *) * DSET_CHARTS);
+    }
+    fprintf(stderr, "\nRunning DB-engine stress test, %u seconds writers ramp-up time,\n"
+                    "%u seconds of concurrent readers and writers, %u writer threads, %u reader threads,\n"
+                    "%u MiB of page cache.\n",
+                    RAMP_UP_SECONDS, TEST_DURATION_SEC, DSET_CHARTS, QUERY_THREADS, PAGE_CACHE_MB);
+
+    time_start = now_realtime_sec();
+    for (i = 0 ; i < DSET_CHARTS ; ++i) {
+        chart_threads[i]->host = host;
+        chart_threads[i]->chartname = "random";
+        chart_threads[i]->dset_charts = DSET_CHARTS;
+        chart_threads[i]->chart_i = i;
+        chart_threads[i]->dset_dims = DSET_DIMS;
+        chart_threads[i]->history_seconds = HISTORY_SECONDS;
+        chart_threads[i]->time_present = time_start;
+        chart_threads[i]->time_max = 0;
+        chart_threads[i]->done = 0;
+        chart_threads[i]->errors = chart_threads[i]->stored_metrics_nr = 0;
+        init_completion(&chart_threads[i]->charts_initialized);
+        assert(0 == uv_thread_create(&chart_threads[i]->thread, generate_dbengine_chart, chart_threads[i]));
+    }
+    /* barrier so that subsequent queries can access valid chart data */
+    for (i = 0 ; i < DSET_CHARTS ; ++i) {
+        wait_for_completion(&chart_threads[i]->charts_initialized);
+        destroy_completion(&chart_threads[i]->charts_initialized);
+    }
+    sleep(RAMP_UP_SECONDS);
+    /* at this point data have already began being written to the database */
+    for (i = 0 ; i < QUERY_THREADS ; ++i) {
+        query_threads[i]->host = host;
+        query_threads[i]->chartname = "random";
+        query_threads[i]->dset_charts = DSET_CHARTS;
+        query_threads[i]->dset_dims = DSET_DIMS;
+        query_threads[i]->history_seconds = HISTORY_SECONDS;
+        query_threads[i]->time_present = time_start;
+        query_threads[i]->done = 0;
+        query_threads[i]->errors = query_threads[i]->queries_nr = query_threads[i]->queried_metrics_nr = 0;
+        for (j = 0 ; j < DSET_CHARTS ; ++j) {
+            query_threads[i]->chart_threads[j] = chart_threads[j];
+        }
+        assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i]));
+    }
+    sleep(TEST_DURATION_SEC);
+    /* stop workload */
+    for (i = 0 ; i < DSET_CHARTS ; ++i) {
+        chart_threads[i]->done = 1;
+    }
+    for (i = 0 ; i < QUERY_THREADS ; ++i) {
+        query_threads[i]->done = 1;
+    }
+    for (i = 0 ; i < DSET_CHARTS ; ++i) {
+        assert(0 == uv_thread_join(&chart_threads[i]->thread));
+    }
+    for (i = 0 ; i < QUERY_THREADS ; ++i) {
+        assert(0 == uv_thread_join(&query_threads[i]->thread));
+    }
+    time_end = now_realtime_sec();
+    fprintf(stderr, "\nDB-engine stress test finished in %ld seconds.\n", time_end - time_start);
+    unsigned long stored_metrics_nr = 0;
+    for (i = 0 ; i < DSET_CHARTS ; ++i) {
+        stored_metrics_nr += chart_threads[i]->stored_metrics_nr;
+    }
+    unsigned long queries_nr = 0, queried_metrics_nr = 0;
+    for (i = 0 ; i < QUERY_THREADS ; ++i) {
+        queries_nr += query_threads[i]->queries_nr;
+        queried_metrics_nr += query_threads[i]->queried_metrics_nr;
+    }
+    fprintf(stderr, "%u metrics were stored (dataset size of %lu MiB) in %u charts by 1 writer thread per chart.\n",
+            DSET_CHARTS * DSET_DIMS, stored_metrics_nr * sizeof(storage_number) / (1024 * 1024), DSET_CHARTS);
+    fprintf(stderr, "Metrics were being generated per 1 emulated second and time was accelerated.\n");
+    fprintf(stderr, "%lu metric data points were queried by %u reader threads.\n", queried_metrics_nr, QUERY_THREADS);
+    fprintf(stderr, "Query starting time is randomly chosen from the beginning of the time-series up to the time of\n"
+                    "the latest data point, and ending time from 1 second up to 1 hour after the starting time.\n");
+    fprintf(stderr, "Performance is %lu written data points/sec and %lu read data points/sec.\n",
+            stored_metrics_nr / (time_end - time_start), queried_metrics_nr / (time_end - time_start));
+
+    for (i = 0 ; i < DSET_CHARTS ; ++i) {
+        freez(chart_threads[i]);
+    }
+    freez(chart_threads);
+    for (i = 0 ; i < QUERY_THREADS ; ++i) {
+        freez(query_threads[i]);
+    }
+    freez(query_threads);
+    rrdeng_exit(host->rrdeng_ctx);
+    rrd_wrlock();
+    rrdhost_delete_charts(host);
+    rrd_unlock();
+}
+
 #endif
diff --git a/daemon/unit_test.h b/daemon/unit_test.h
index fd3e801705..230a700858 100644
--- a/daemon/unit_test.h
+++ b/daemon/unit_test.h
@@ -11,6 +11,9 @@ extern int unit_test_buffer(void);
 #ifdef ENABLE_DBENGINE
 extern int test_dbengine(void);
 extern void generate_dbengine_dataset(unsigned history_seconds);
+extern void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS,
+                                 unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB);
+
 #endif
 
 #endif /* NETDATA_UNIT_TEST_H */
diff --git a/database/README.md b/database/README.md
index 1efdd9a94b..143615a0e8 100644
--- a/database/README.md
+++ b/database/README.md
@@ -25,7 +25,7 @@ Currently Netdata supports 6 memory modes:
 
 1.  `ram`, data are purely in memory. Data are never saved on disk. This mode uses `mmap()` and supports [KSM](#ksm).
 
-2.  `save`, (the default) data are only in RAM while Netdata runs and are saved to / loaded from disk on Netdata
+2.  `save`, data are only in RAM while Netdata runs and are saved to / loaded from disk on Netdata
     restart. It also uses `mmap()` and supports [KSM](#ksm).
 
 3.  `map`, data are in memory mapped files. This works like the swap. Keep in mind though, this will have a constant
@@ -39,11 +39,12 @@ Currently Netdata supports 6 memory modes:
 5.  `alloc`, like `ram` but it uses `calloc()` and does not support [KSM](#ksm). This mode is the fallback for all
     others except `none`.
 
-6.  `dbengine`, data are in database files. The [Database Engine](engine/) works like a traditional database. There is
-    some amount of RAM dedicated to data caching and indexing and the rest of the data reside compressed on disk. The
-    number of history entries is not fixed in this case, but depends on the configured disk space and the effective
-    compression ratio of the data stored. This is the **only mode** that supports changing the data collection update
-    frequency (`update_every`) **without losing** the previously stored metrics. For more details see [here](engine/).
+6.  `dbengine`, (the default) data are in database files. The [Database Engine](engine/) works like a traditional
+    database. There is some amount of RAM dedicated to data caching and indexing and the rest of the data reside
+    compressed on disk. The number of history entries is not fixed in this case, but depends on the configured disk
+    space and the effective compression ratio of the data stored. This is the **only mode** that supports changing the
+    data collection update frequency (`update_every`) **without losing** the previously stored metrics. For more details
+    see [here](engine/).
 
 You can select the memory mode by editing `netdata.conf` and setting:
 
@@ -63,7 +64,7 @@ Embedded devices usually have very limited RAM resources available.
 There are 2 settings for you to tweak:
 
 1.  `update every`, which controls the data collection frequency
-2.  `history`, which controls the size of the database in RAM
+2.  `history`, which controls the size of the database in RAM (except for `memory mode = dbengine`)
 
 By default `update every = 1` and `history = 3600`. This gives you an hour of data with per second updates.
 
diff --git a/database/engine/README.md b/database/engine/README.md
index 12c22a92c2..e824aa3a27 100644
--- a/database/engine/README.md
+++ b/database/engine/README.md
@@ -141,4 +141,55 @@ kern.maxfiles=65536
 
 You can apply the settings by running `sysctl -p` or by rebooting.
 
+## Evaluation
+
+We have evaluated the performance of the `dbengine` API that the netdata daemon uses internally. This is **not** the
+web API of netdata. Our benchmarks ran on a **single** `dbengine` instance, multiple of which can be running in a
+netdata master server. We used a server with an AMD Ryzen Threadripper 2950X 16-Core Processor and 2 disk drives, a
+Seagate Constellation ES.3 2TB magnetic HDD and a SAMSUNG MZQLB960HAJR-00007 960GB NAND Flash SSD.
+
+For our workload, we defined 32 charts with 128 metrics each, giving us a total of 4096 metrics. We defined 1 worker
+thread per chart (32 threads) that generates new data points with a data generation interval of 1 second. The time axis
+of the time-series is emulated and accelerated so that the worker threads can generate as many data points as possible
+without delays. 
+
+We also defined 32 worker threads that perform queries on random metrics with semi-random time ranges. The
+starting time of the query is randomly selected between the beginning of the time-series and the time of the latest data
+point. The ending time is randomly selected between 1 second and 1 hour after the starting time. The pseudo-random
+numbers are generated with a uniform distribution.
+
+The data are written to the database at the same time as they are read from it. This is a concurrent read/write mixed
+workload with a duration of 60 seconds. The faster `dbengine` runs, the bigger the dataset size becomes since more
+data points will be generated. We set a page cache size of 64MiB for the two disk-bound scenarios. This way, the dataset
+size of the metric data is much bigger than the RAM that is being used for caching so as to trigger I/O requests most
+of the time. In our final scenario, we set the page cache size to 16 GiB. That way, the dataset fits in the page cache
+so as to avoid all disk bottlenecks.
+
+The reported numbers are the following:
+
+| device | page cache | dataset | reads/sec | writes/sec |
+| :---: | :---: | ---: | ---: | ---: |
+| HDD | 64 MiB | 4.1 GiB | 813K | 18.0M |
+| SSD | 64 MiB | 9.8 GiB | 1.7M | 43.0M |
+| N/A | 16 GiB | 6.8 GiB |118.2M | 30.2M |
+
+where "reads/sec" is the number of metric data points being read from the database via its API per second and
+"writes/sec" is the number of metric data points being written to the database per second. 
+
+Notice that the HDD numbers are pretty high and not much slower than the SSD numbers. This is thanks to the database
+engine design being optimized for rotating media. In the database engine disk I/O requests are:
+
+-   asynchronous to mask the high I/O latency of HDDs.
+-   mostly large to reduce the amount of HDD seeking time.
+-   mostly sequential to reduce the amount of HDD seeking time.
+-   compressed to reduce the amount of required throughput.
+
+As a result, the HDD is not thousands of times slower than the SSD, which is typical for other workloads.
+
+An interesting observation to make is that the CPU-bound run (16 GiB page cache) generates fewer data than the SSD run
+(6.8 GiB vs 9.8 GiB). The reason is that the 32 reader threads in the SSD scenario are more frequently blocked by I/O,
+and generate a read load of 1.7M/sec, whereas in the CPU-bound scenario the read load is 70 times higher at 118M/sec.
+Consequently, there is a significant degree of interference by the reader threads, that slow down the writer threads.
+This is also possible because the interference effects are greater than the SSD impact on data generation throughput.
+
 [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdatabase%2Fengine%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>)
diff --git a/database/engine/rrdengine.c b/database/engine/rrdengine.c
index 7b57a4194a..896d71f169 100644
--- a/database/engine/rrdengine.c
+++ b/database/engine/rrdengine.c
@@ -815,47 +815,6 @@ error_after_loop_init:
     complete(&ctx->rrdengine_completion);
 }
 
-
-#define NR_PAGES (256)
-static void basic_functional_test(struct rrdengine_instance *ctx)
-{
-    int i, j, failed_validations;
-    uuid_t uuid[NR_PAGES];
-    void *buf;
-    struct rrdeng_page_descr *handle[NR_PAGES];
-    char uuid_str[UUID_STR_LEN];
-    char backup[NR_PAGES][UUID_STR_LEN * 100]; /* backup storage for page data verification */
-
-    for (i = 0 ; i < NR_PAGES ; ++i) {
-        uuid_generate(uuid[i]);
-        uuid_unparse_lower(uuid[i], uuid_str);
-//      fprintf(stderr, "Generated uuid[%d]=%s\n", i, uuid_str);
-        buf = rrdeng_create_page(ctx, &uuid[i], &handle[i]);
-        /* Each page contains 10 times its own UUID stringified */
-        for (j = 0 ; j < 100 ; ++j) {
-            strcpy(buf + UUID_STR_LEN * j, uuid_str);
-            strcpy(backup[i] + UUID_STR_LEN * j, uuid_str);
-        }
-        rrdeng_commit_page(ctx, handle[i], (Word_t)i);
-    }
-    fprintf(stderr, "\n********** CREATED %d METRIC PAGES ***********\n\n", NR_PAGES);
-    failed_validations = 0;
-    for (i = 0 ; i < NR_PAGES ; ++i) {
-        buf = rrdeng_get_latest_page(ctx, &uuid[i], (void **)&handle[i]);
-        if (NULL == buf) {
-            ++failed_validations;
-            fprintf(stderr, "Page %d was LOST.\n", i);
-        }
-        if (memcmp(backup[i], buf, UUID_STR_LEN * 100)) {
-            ++failed_validations;
-            fprintf(stderr, "Page %d data comparison with backup FAILED validation.\n", i);
-        }
-        rrdeng_put_page(ctx, handle[i]);
-    }
-    fprintf(stderr, "\n********** CORRECTLY VALIDATED %d/%d METRIC PAGES ***********\n\n",
-            NR_PAGES - failed_validations, NR_PAGES);
-
-}
 /* C entry point for development purposes
  * make "LDFLAGS=-errdengine_main"
  */
@@ -868,8 +827,6 @@ void rrdengine_main(void)
     if (ret) {
         exit(ret);
     }
-    basic_functional_test(ctx);
-
     rrdeng_exit(ctx);
     fprintf(stderr, "Hello world!");
     exit(0);
diff --git a/database/engine/rrdenginelib.c b/database/engine/rrdenginelib.c
index 96504b275f..1a04dc2a47 100644
--- a/database/engine/rrdenginelib.c
+++ b/database/engine/rrdenginelib.c
@@ -8,7 +8,7 @@ void print_page_cache_descr(struct rrdeng_page_descr *descr)
 {
     struct page_cache_descr *pg_cache_descr = descr->pg_cache_descr;
     char uuid_str[UUID_STR_LEN];
-    char str[BUFSIZE];
+    char str[BUFSIZE + 1];
     int pos = 0;
 
     uuid_unparse_lower(*descr->id, uuid_str);
@@ -31,7 +31,7 @@ void print_page_cache_descr(struct rrdeng_page_descr *descr)
 void print_page_descr(struct rrdeng_page_descr *descr)
 {
     char uuid_str[UUID_STR_LEN];
-    char str[BUFSIZE];
+    char str[BUFSIZE + 1];
     int pos = 0;
 
     uuid_unparse_lower(*descr->id, uuid_str);
diff --git a/database/rrd.c b/database/rrd.c
index 31ad3f07e1..dcab65189e 100644
--- a/database/rrd.c
+++ b/database/rrd.c
@@ -15,7 +15,11 @@ int rrd_delete_unupdated_dimensions = 0;
 
 int default_rrd_update_every = UPDATE_EVERY;
 int default_rrd_history_entries = RRD_DEFAULT_HISTORY_ENTRIES;
+#ifdef ENABLE_DBENGINE
+RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
+#else
 RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE;
+#endif
 int gap_when_lost_iterations_above = 1;
author	Markos Fountoulakis <44345837+mfundul@users.noreply.github.com>	2019-10-03 17:04:51 +0300
committer	GitHub <noreply@github.com>	2019-10-03 17:04:51 +0300
commit	95119afff48735607643bfe3824ed3727b6edbb0 (patch)
tree	7d588b0f7131743d58386c100cb9fb2b0d97c187
parent	06cdca8fdfb5f8af43a368e9afe0e996fb1ea8fd (diff)