summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorthiagoftsm <thiagoftsm@gmail.com>2020-05-26 11:57:56 +0000
committerGitHub <noreply@github.com>2020-05-26 11:57:56 +0000
commitba47e197056601c7db3ce8db0db277d155e8118a (patch)
tree2fac52f2b06f0647820f722acdacf9ef6296fba3
parente92d2ce7a155a33ee6b0acfd928f357251c02c69 (diff)
New alarms (exporting and Backend) (#9075)
New alarms for exporting and backend.
-rw-r--r--exporting/send_internal_metrics.c10
-rw-r--r--health/Makefile.am1
-rw-r--r--health/health.d/backend.conf11
-rw-r--r--health/health.d/exporting.conf34
4 files changed, 51 insertions, 5 deletions
diff --git a/exporting/send_internal_metrics.c b/exporting/send_internal_metrics.c
index e4111a587b..defb8d047d 100644
--- a/exporting/send_internal_metrics.c
+++ b/exporting/send_internal_metrics.c
@@ -15,7 +15,7 @@ void create_main_rusage_chart(RRDSET **st_rusage, RRDDIM **rd_user, RRDDIM **rd_
return;
*st_rusage = rrdset_create_localhost(
- "netdata", "exporting_main_thread_cpu", NULL, "exporting", NULL, "Netdata Main Exporting Thread CPU Usage",
+ "netdata", "exporting_main_thread_cpu", NULL, "exporting", "exporting_cpu_usage", "Netdata Main Exporting Thread CPU Usage",
"milliseconds/s", "exporting", NULL, 130600, localhost->rrd_update_every, RRDSET_TYPE_STACKED);
*rd_user = rrddim_add(*st_rusage, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
@@ -67,7 +67,7 @@ void send_internal_metrics(struct instance *instance)
netdata_fix_chart_id(id);
stats->st_metrics = rrdset_create_localhost(
- "netdata", id, NULL, buffer_tostring(family), NULL, "Netdata Buffered Metrics", "metrics", "exporting", NULL,
+ "netdata", id, NULL, buffer_tostring(family), "exporting_buffer", "Netdata Buffered Metrics", "metrics", "exporting", NULL,
130610, instance->config.update_every, RRDSET_TYPE_LINE);
stats->rd_buffered_metrics = rrddim_add(stats->st_metrics, "buffered", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
@@ -80,7 +80,7 @@ void send_internal_metrics(struct instance *instance)
netdata_fix_chart_id(id);
stats->st_bytes = rrdset_create_localhost(
- "netdata", id, NULL, buffer_tostring(family), NULL, "Netdata Exporting Data Size", "KiB", "exporting", NULL,
+ "netdata", id, NULL, buffer_tostring(family), "exporting_data_size", "Netdata Exporting Data Size", "KiB", "exporting", NULL,
130620, instance->config.update_every, RRDSET_TYPE_AREA);
stats->rd_buffered_bytes = rrddim_add(stats->st_bytes, "buffered", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE);
@@ -94,7 +94,7 @@ void send_internal_metrics(struct instance *instance)
netdata_fix_chart_id(id);
stats->st_ops = rrdset_create_localhost(
- "netdata", id, NULL, buffer_tostring(family), NULL, "Netdata Exporting Operations", "operations", "exporting",
+ "netdata", id, NULL, buffer_tostring(family), "exporting_operations", "Netdata Exporting Operations", "operations", "exporting",
NULL, 130630, instance->config.update_every, RRDSET_TYPE_LINE);
stats->rd_transmission_successes = rrddim_add(stats->st_ops, "write", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
@@ -109,7 +109,7 @@ void send_internal_metrics(struct instance *instance)
netdata_fix_chart_id(id);
stats->st_rusage = rrdset_create_localhost(
- "netdata", id, NULL, buffer_tostring(family), NULL, "Netdata Exporting Instance Thread CPU Usage",
+ "netdata", id, NULL, buffer_tostring(family), "exporting_instance", "Netdata Exporting Instance Thread CPU Usage",
"milliseconds/s", "exporting", NULL, 130640, instance->config.update_every, RRDSET_TYPE_STACKED);
stats->rd_user = rrddim_add(stats->st_rusage, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL);
diff --git a/health/Makefile.am b/health/Makefile.am
index c9d2451bd0..a69836879c 100644
--- a/health/Makefile.am
+++ b/health/Makefile.am
@@ -45,6 +45,7 @@ dist_healthconfig_DATA = \
health.d/dockerd.conf \
health.d/elasticsearch.conf \
health.d/entropy.conf \
+ health.d/exporting.conf \
health.d/fping.conf \
health.d/ioping.conf \
health.d/fronius.conf \
diff --git a/health/health.d/backend.conf b/health/health.d/backend.conf
index 7af100d8f4..e51b8aa5f7 100644
--- a/health/health.d/backend.conf
+++ b/health/health.d/backend.conf
@@ -1,3 +1,13 @@
+# Alert that backends subsystem will be disabled soon
+ alarm: backend_metrics_eol
+ on: netdata.backend_metrics
+ units: boolean
+ calc: $now - $last_collected_t
+ every: 1m
+ warn: $this > 0
+ delay: down 5m multiplier 1.5 max 1h
+ info: The backends subsystem is deprecated and will be removed soon. Migrate your configuration to exporting.conf.
+ to: sysadmin
# make sure we are sending data to backend
@@ -32,6 +42,7 @@
info: number of metrics lost due to repeating failures to contact the backend server
to: dba
+
# this chart has been removed from netdata
# alarm: backend_slow
# on: netdata.backend_latency
diff --git a/health/health.d/exporting.conf b/health/health.d/exporting.conf
new file mode 100644
index 0000000000..506cb0cf75
--- /dev/null
+++ b/health/health.d/exporting.conf
@@ -0,0 +1,34 @@
+
+template: exporting_last_buffering
+families: *
+ on: exporting_data_size
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful buffering of exporting data
+ to: dba
+
+template: exporting_metrics_sent
+families: *
+ on: exporting_data_size
+ units: %
+ calc: abs($sent) * 100 / abs($buffered)
+ every: 10s
+ warn: $this != 100
+ delay: down 5m multiplier 1.5 max 1h
+ info: percentage of metrics sent to the external database server
+ to: dba
+
+template: exporting_metrics_lost
+families: *
+ on: exporting_data_size
+ units: metrics
+ calc: abs($lost)
+ every: 10s
+ crit: ($this != 0) || ($status == $CRITICAL && abs($sent) == 0)
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of metrics lost due to repeating failures to contact the external database server
+ to: dba