diff options
author | Emmanuel Vasilakis <mrzammler@mm.st> | 2021-04-20 16:24:41 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-04-20 16:24:41 +0300 |
commit | f5bd20e60ae8a65b9c709fcd28d08c1bea268f2d (patch) | |
tree | 8897d68a69f82d8fd0732aca8c344778a5e62340 /health/health.d/net.conf | |
parent | 0a6a14e323ee3b7a2dc17b2ca5b0cce4a8b2eb5e (diff) |
Provide new attributes in health conf files (#10961)
* read and store new attributes (class, component, type) from health conf files. Replace family variable in info strings
* provide the attributes to jsons
* remove extra semicolon
* populate conf files with new attributes
* added newline
* remove extra defines from health.h
* remove empty line
* remove realloc
* use helper variables for find_and_replace. Adjust position for next strstr
* remove comments
* Add type to mysql.conf and vcsa.conf
* fix formatting
* add parenthesis
* remove extra assignment
* changes to mysql_galera_cluster_state from master
* add type Errors to unbound_request_list_overwritten
* fix identation for info strings spawning more than one line
* check for null, replace with empty string if true
* add class, component, type to systemdunits.conf
Diffstat (limited to 'health/health.d/net.conf')
-rw-r--r-- | health/health.d/net.conf | 322 |
1 files changed, 182 insertions, 140 deletions
diff --git a/health/health.d/net.conf b/health/health.d/net.conf index 33202421fe..04219e163a 100644 --- a/health/health.d/net.conf +++ b/health/health.d/net.conf @@ -6,16 +6,22 @@ template: interface_speed on: net.net + class: System +component: Network + type: Latency os: * hosts: * families: * calc: ( $nic_speed_max > 0 ) ? ( $nic_speed_max) : ( nan ) units: Mbit every: 10s - info: network interface current speed + info: network interface $family current speed template: 1m_received_traffic_overflow on: net.net + class: System +component: Network + type: Workload os: linux hosts: * families: * @@ -25,11 +31,14 @@ every: 10s warn: $this > (($status >= $WARNING) ? (85) : (90)) delay: up 1m down 1m multiplier 1.5 max 1h - info: average inbound utilization for the network interface over the last minute + info: average inbound utilization for the network interface $family over the last minute to: sysadmin template: 1m_sent_traffic_overflow on: net.net + class: System +component: Network + type: Workload os: linux hosts: * families: * @@ -39,7 +48,7 @@ every: 10s warn: $this > (($status >= $WARNING) ? (85) : (90)) delay: up 1m down 1m multiplier 1.5 max 1h - info: average outbound utilization for the network interface over the last minute + info: average outbound utilization for the network interface $family over the last minute to: sysadmin # ----------------------------------------------------------------------------- @@ -52,110 +61,134 @@ # it is possible to have expected packet drops on an interface for some network configurations # look at the Monitoring Network Interfaces section in the proc.plugin documentation for more information -template: inbound_packets_dropped - on: net.drops - os: linux - hosts: * -families: !net* * - lookup: sum -10m unaligned absolute of inbound - units: packets - every: 1m - info: number of inbound dropped packets for the network interface in the last 10 minutes - -template: outbound_packets_dropped - on: net.drops - os: linux - hosts: * -families: !net* * - lookup: sum -10m unaligned absolute of outbound - units: packets - every: 1m - info: number of outbound dropped packets for the network interface in the last 10 minutes - -template: inbound_packets_dropped_ratio - on: net.packets - os: linux - hosts: * -families: !net* !wl* * - lookup: sum -10m unaligned absolute of received - calc: (($inbound_packets_dropped != nan AND $this > 1000) ? ($inbound_packets_dropped * 100 / $this) : (0)) - units: % - every: 1m - warn: $this >= 2 - delay: up 1m down 1h multiplier 1.5 max 2h - info: ratio of inbound dropped packets for the network interface over the last 10 minutes - to: sysadmin - -template: outbound_packets_dropped_ratio - on: net.packets - os: linux - hosts: * -families: !net* !wl* * - lookup: sum -10m unaligned absolute of sent - calc: (($outbound_packets_dropped != nan AND $this > 1000) ? ($outbound_packets_dropped * 100 / $this) : (0)) - units: % - every: 1m - warn: $this >= 2 - delay: up 1m down 1h multiplier 1.5 max 2h - info: ratio of outbound dropped packets for the network interface over the last 10 minutes - to: sysadmin - -template: wifi_inbound_packets_dropped_ratio - on: net.packets - os: linux - hosts: * -families: wl* - lookup: sum -10m unaligned absolute of received - calc: (($inbound_packets_dropped != nan AND $this > 1000) ? ($inbound_packets_dropped * 100 / $this) : (0)) - units: % - every: 1m - warn: $this >= 10 - delay: up 1m down 1h multiplier 1.5 max 2h - info: ratio of inbound dropped packets for the network interface over the last 10 minutes - to: sysadmin - -template: wifi_outbound_packets_dropped_ratio - on: net.packets - os: linux - hosts: * -families: wl* - lookup: sum -10m unaligned absolute of sent - calc: (($outbound_packets_dropped != nan AND $this > 1000) ? ($outbound_packets_dropped * 100 / $this) : (0)) - units: % - every: 1m - warn: $this >= 10 - delay: up 1m down 1h multiplier 1.5 max 2h - info: ratio of outbound dropped packets for the network interface over the last 10 minutes - to: sysadmin + template: inbound_packets_dropped + on: net.drops + class: System +component: Network + type: Errors + os: linux + hosts: * + families: !net* * + lookup: sum -10m unaligned absolute of inbound + units: packets + every: 1m + info: number of inbound dropped packets for the network interface $family in the last 10 minutes + + template: outbound_packets_dropped + on: net.drops + class: System +component: Network + type: Errors + os: linux + hosts: * + families: !net* * + lookup: sum -10m unaligned absolute of outbound + units: packets + every: 1m + info: number of outbound dropped packets for the network interface $family in the last 10 minutes + + template: inbound_packets_dropped_ratio + on: net.packets + class: System +component: Network + type: Errors + os: linux + hosts: * + families: !net* !wl* * + lookup: sum -10m unaligned absolute of received + calc: (($inbound_packets_dropped != nan AND $this > 1000) ? ($inbound_packets_dropped * 100 / $this) : (0)) + units: % + every: 1m + warn: $this >= 2 + delay: up 1m down 1h multiplier 1.5 max 2h + info: ratio of inbound dropped packets for the network interface $family over the last 10 minutes + to: sysadmin + + template: outbound_packets_dropped_ratio + on: net.packets + class: System +component: Network + type: Errors + os: linux + hosts: * + families: !net* !wl* * + lookup: sum -10m unaligned absolute of sent + calc: (($outbound_packets_dropped != nan AND $this > 1000) ? ($outbound_packets_dropped * 100 / $this) : (0)) + units: % + every: 1m + warn: $this >= 2 + delay: up 1m down 1h multiplier 1.5 max 2h + info: ratio of outbound dropped packets for the network interface $family over the last 10 minutes + to: sysadmin + + template: wifi_inbound_packets_dropped_ratio + on: net.packets + class: System +component: Network + type: Errors + os: linux + hosts: * + families: wl* + lookup: sum -10m unaligned absolute of received + calc: (($inbound_packets_dropped != nan AND $this > 1000) ? ($inbound_packets_dropped * 100 / $this) : (0)) + units: % + every: 1m + warn: $this >= 10 + delay: up 1m down 1h multiplier 1.5 max 2h + info: ratio of inbound dropped packets for the network interface $family over the last 10 minutes + to: sysadmin + + template: wifi_outbound_packets_dropped_ratio + on: net.packets + class: System +component: Network + type: Errors + os: linux + hosts: * + families: wl* + lookup: sum -10m unaligned absolute of sent + calc: (($outbound_packets_dropped != nan AND $this > 1000) ? ($outbound_packets_dropped * 100 / $this) : (0)) + units: % + every: 1m + warn: $this >= 10 + delay: up 1m down 1h multiplier 1.5 max 2h + info: ratio of outbound dropped packets for the network interface $family over the last 10 minutes + to: sysadmin # ----------------------------------------------------------------------------- # interface errors -template: interface_inbound_errors - on: net.errors - os: freebsd - hosts: * -families: * - lookup: sum -10m unaligned absolute of inbound - units: errors - every: 1m - warn: $this >= 5 - delay: down 1h multiplier 1.5 max 2h - info: number of inbound errors for the network interface in the last 10 minutes - to: sysadmin - -template: interface_outbound_errors - on: net.errors - os: freebsd - hosts: * -families: * - lookup: sum -10m unaligned absolute of outbound - units: errors - every: 1m - warn: $this >= 5 - delay: down 1h multiplier 1.5 max 2h - info: number of outbound errors for the network interface in the last 10 minutes - to: sysadmin + template: interface_inbound_errors + on: net.errors + class: System +component: Network + type: Errors + os: freebsd + hosts: * + families: * + lookup: sum -10m unaligned absolute of inbound + units: errors + every: 1m + warn: $this >= 5 + delay: down 1h multiplier 1.5 max 2h + info: number of inbound errors for the network interface $family in the last 10 minutes + to: sysadmin + + template: interface_outbound_errors + on: net.errors + class: System +component: Network + type: Errors + os: freebsd + hosts: * + families: * + lookup: sum -10m unaligned absolute of outbound + units: errors + every: 1m + warn: $this >= 5 + delay: down 1h multiplier 1.5 max 2h + info: number of outbound errors for the network interface $family in the last 10 minutes + to: sysadmin # ----------------------------------------------------------------------------- # FIFO errors @@ -165,18 +198,21 @@ families: * # the alarm is checked every 1 minute # and examines the last 10 minutes of data -template: 10min_fifo_errors - on: net.fifo - os: linux - hosts: * -families: * - lookup: sum -10m unaligned absolute - units: errors - every: 1m - warn: $this > 0 - delay: down 1h multiplier 1.5 max 2h - info: number of FIFO errors for the network interface in the last 10 minutes - to: sysadmin + template: 10min_fifo_errors + on: net.fifo + class: System +component: Network + type: Errors + os: linux + hosts: * + families: * + lookup: sum -10m unaligned absolute + units: errors + every: 1m + warn: $this > 0 + delay: down 1h multiplier 1.5 max 2h + info: number of FIFO errors for the network interface $family in the last 10 minutes + to: sysadmin # ----------------------------------------------------------------------------- # check for packet storms @@ -187,28 +223,34 @@ families: * # we assume the minimum packet storm should at least have # 10000 packets/s, average of the last 10 seconds -template: 1m_received_packets_rate - on: net.packets - os: linux freebsd - hosts: * -families: * - lookup: average -1m unaligned of received - units: packets - every: 10s - info: average number of packets received by the network interface over the last minute - -template: 10s_received_packets_storm - on: net.packets - os: linux freebsd - hosts: * -families: * - lookup: average -10s unaligned of received - calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate)) - every: 10s - units: % - warn: $this > (($status >= $WARNING)?(200):(5000)) - crit: $this > (($status == $CRITICAL)?(5000):(6000)) - options: no-clear-notification - info: ratio of average number of received packets for the network interface over the last 10 seconds, \ - compared to the rate over the last minute - to: sysadmin + template: 1m_received_packets_rate + on: net.packets + class: System +component: Network + type: Workload + os: linux freebsd + hosts: * + families: * + lookup: average -1m unaligned of received + units: packets + every: 10s + info: average number of packets received by the network interface $family over the last minute + + template: 10s_received_packets_storm + on: net.packets + class: System +component: Network + type: Workload + os: linux freebsd + hosts: * + families: * + lookup: average -10s unaligned of received + calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate)) + every: 10s + units: % + warn: $this > (($status >= $WARNING)?(200):(5000)) + crit: $this > (($status == $CRITICAL)?(5000):(6000)) + options: no-clear-notification + info: ratio of average number of received packets for the network interface $family over the last 10 seconds, \ + compared to the rate over the last minute + to: sysadmin |