From ba47e197056601c7db3ce8db0db277d155e8118a Mon Sep 17 00:00:00 2001 From: thiagoftsm Date: Tue, 26 May 2020 11:57:56 +0000 Subject: New alarms (exporting and Backend) (#9075) New alarms for exporting and backend. --- health/health.d/exporting.conf | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 health/health.d/exporting.conf (limited to 'health/health.d/exporting.conf') diff --git a/health/health.d/exporting.conf b/health/health.d/exporting.conf new file mode 100644 index 0000000000..506cb0cf75 --- /dev/null +++ b/health/health.d/exporting.conf @@ -0,0 +1,34 @@ + +template: exporting_last_buffering +families: * + on: exporting_data_size + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful buffering of exporting data + to: dba + +template: exporting_metrics_sent +families: * + on: exporting_data_size + units: % + calc: abs($sent) * 100 / abs($buffered) + every: 10s + warn: $this != 100 + delay: down 5m multiplier 1.5 max 1h + info: percentage of metrics sent to the external database server + to: dba + +template: exporting_metrics_lost +families: * + on: exporting_data_size + units: metrics + calc: abs($lost) + every: 10s + crit: ($this != 0) || ($status == $CRITICAL && abs($sent) == 0) + delay: down 5m multiplier 1.5 max 1h + info: number of metrics lost due to repeating failures to contact the external database server + to: dba -- cgit v1.2.3