summaryrefslogtreecommitdiffstats
path: root/health
diff options
context:
space:
mode:
authorIlya Mashchenko <ilya@netdata.cloud>2022-12-23 15:10:49 +0200
committerGitHub <noreply@github.com>2022-12-23 15:10:49 +0200
commit1a326fc1ce9ecbcce5ca4b4b72a3bea8f8da0f3b (patch)
treec586b5449a18308d5a065e1f9d946f877a8da75c /health
parent2bca08132a436409b59803e0026034dd4ef823f2 (diff)
use specific charts labels instead of family in alarms (#14173)
Diffstat (limited to 'health')
-rw-r--r--health/health.d/cgroups.conf8
-rw-r--r--health/health.d/disks.conf8
-rw-r--r--health/health.d/mdstat.conf4
-rw-r--r--health/health.d/net.conf28
-rw-r--r--health/health.d/zfs.conf4
5 files changed, 26 insertions, 26 deletions
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf
index 4bfe38b65a..7bdd6d6b36 100644
--- a/health/health.d/cgroups.conf
+++ b/health/health.d/cgroups.conf
@@ -51,7 +51,7 @@ component: Network
lookup: average -1m unaligned of received
units: packets
every: 10s
- info: average number of packets received by the network interface $family over the last minute
+ info: average number of packets received by the network interface $label:device over the last minute
template: cgroup_10s_received_packets_storm
on: cgroup.net_packets
@@ -66,7 +66,7 @@ component: Network
warn: $this > (($status >= $WARNING)?(200):(5000))
crit: $this > (($status == $CRITICAL)?(5000):(6000))
options: no-clear-notification
- info: ratio of average number of received packets for the network interface $family over the last 10 seconds, \
+ info: ratio of average number of received packets for the network interface $label:device over the last 10 seconds, \
compared to the rate over the last minute
to: sysadmin
@@ -121,7 +121,7 @@ component: Network
lookup: average -1m unaligned of received
units: packets
every: 10s
- info: average number of packets received by the network interface $family over the last minute
+ info: average number of packets received by the network interface $label:device over the last minute
template: k8s_cgroup_10s_received_packets_storm
on: k8s.cgroup.net_packets
@@ -136,6 +136,6 @@ component: Network
warn: $this > (($status >= $WARNING)?(200):(5000))
crit: $this > (($status == $CRITICAL)?(5000):(6000))
options: no-clear-notification
- info: ratio of average number of received packets for the network interface $family over the last 10 seconds, \
+ info: ratio of average number of received packets for the network interface $label:device over the last 10 seconds, \
compared to the rate over the last minute
to: sysadmin
diff --git a/health/health.d/disks.conf b/health/health.d/disks.conf
index 5daff61a14..dc5f3d4f9a 100644
--- a/health/health.d/disks.conf
+++ b/health/health.d/disks.conf
@@ -23,7 +23,7 @@ component: Disk
warn: $this > (($status >= $WARNING ) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: up 1m down 15m multiplier 1.5 max 1h
- info: disk $family space utilization
+ info: disk $label:mount_point space utilization
to: sysadmin
template: disk_inode_usage
@@ -40,7 +40,7 @@ component: Disk
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: up 1m down 15m multiplier 1.5 max 1h
- info: disk $family inode utilization
+ info: disk $label:mount_point inode utilization
to: sysadmin
@@ -147,7 +147,7 @@ component: Disk
every: 1m
warn: $this > 98 * (($status >= $WARNING) ? (0.7) : (1))
delay: down 15m multiplier 1.2 max 1h
- info: average percentage of time $family disk was busy over the last 10 minutes
+ info: average percentage of time $label:device disk was busy over the last 10 minutes
to: silent
@@ -169,5 +169,5 @@ component: Disk
every: 1m
warn: $this > 5000 * (($status >= $WARNING) ? (0.7) : (1))
delay: down 15m multiplier 1.2 max 1h
- info: average backlog size of the $family disk over the last 10 minutes
+ info: average backlog size of the $label:device disk over the last 10 minutes
to: silent
diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf
index cedaa000eb..3594cd043c 100644
--- a/health/health.d/mdstat.conf
+++ b/health/health.d/mdstat.conf
@@ -20,7 +20,7 @@ component: RAID
every: 10s
calc: $down
crit: $this > 0
- info: number of devices in the down state for the $family array. \
+ info: number of devices in the down state for the $label:device $label:raid_level array. \
Any number > 0 indicates that the array is degraded.
to: sysadmin
@@ -35,7 +35,7 @@ component: RAID
every: 60s
warn: $this > 1024
delay: up 30m
- info: number of unsynchronized blocks for the $family array
+ info: number of unsynchronized blocks for the $label:device $label:raid_level array
to: sysadmin
template: mdstat_nonredundant_last_collected
diff --git a/health/health.d/net.conf b/health/health.d/net.conf
index 9d5b3b8d35..28aa617b9b 100644
--- a/health/health.d/net.conf
+++ b/health/health.d/net.conf
@@ -15,7 +15,7 @@ component: Network
calc: ( $nic_speed_max > 0 ) ? ( $nic_speed_max) : ( nan )
units: Mbit
every: 10s
- info: network interface $family current speed
+ info: network interface $label:device current speed
template: 1m_received_traffic_overflow
on: net.net
@@ -31,7 +31,7 @@ component: Network
every: 10s
warn: $this > (($status >= $WARNING) ? (85) : (90))
delay: up 1m down 1m multiplier 1.5 max 1h
- info: average inbound utilization for the network interface $family over the last minute
+ info: average inbound utilization for the network interface $label:device over the last minute
to: sysadmin
template: 1m_sent_traffic_overflow
@@ -48,7 +48,7 @@ component: Network
every: 10s
warn: $this > (($status >= $WARNING) ? (85) : (90))
delay: up 1m down 1m multiplier 1.5 max 1h
- info: average outbound utilization for the network interface $family over the last minute
+ info: average outbound utilization for the network interface $label:device over the last minute
to: sysadmin
# -----------------------------------------------------------------------------
@@ -72,7 +72,7 @@ component: Network
lookup: sum -10m unaligned absolute of inbound
units: packets
every: 1m
- info: number of inbound dropped packets for the network interface $family in the last 10 minutes
+ info: number of inbound dropped packets for the network interface $label:device in the last 10 minutes
template: outbound_packets_dropped
on: net.drops
@@ -85,7 +85,7 @@ component: Network
lookup: sum -10m unaligned absolute of outbound
units: packets
every: 1m
- info: number of outbound dropped packets for the network interface $family in the last 10 minutes
+ info: number of outbound dropped packets for the network interface $label:device in the last 10 minutes
template: inbound_packets_dropped_ratio
on: net.packets
@@ -101,7 +101,7 @@ component: Network
every: 1m
warn: $this >= 2
delay: up 1m down 1h multiplier 1.5 max 2h
- info: ratio of inbound dropped packets for the network interface $family over the last 10 minutes
+ info: ratio of inbound dropped packets for the network interface $label:device over the last 10 minutes
to: sysadmin
template: outbound_packets_dropped_ratio
@@ -118,7 +118,7 @@ component: Network
every: 1m
warn: $this >= 2
delay: up 1m down 1h multiplier 1.5 max 2h
- info: ratio of outbound dropped packets for the network interface $family over the last 10 minutes
+ info: ratio of outbound dropped packets for the network interface $label:device over the last 10 minutes
to: sysadmin
template: wifi_inbound_packets_dropped_ratio
@@ -135,7 +135,7 @@ component: Network
every: 1m
warn: $this >= 10
delay: up 1m down 1h multiplier 1.5 max 2h
- info: ratio of inbound dropped packets for the network interface $family over the last 10 minutes
+ info: ratio of inbound dropped packets for the network interface $label:device over the last 10 minutes
to: sysadmin
template: wifi_outbound_packets_dropped_ratio
@@ -152,7 +152,7 @@ component: Network
every: 1m
warn: $this >= 10
delay: up 1m down 1h multiplier 1.5 max 2h
- info: ratio of outbound dropped packets for the network interface $family over the last 10 minutes
+ info: ratio of outbound dropped packets for the network interface $label:device over the last 10 minutes
to: sysadmin
# -----------------------------------------------------------------------------
@@ -171,7 +171,7 @@ component: Network
every: 1m
warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
- info: number of inbound errors for the network interface $family in the last 10 minutes
+ info: number of inbound errors for the network interface $label:device in the last 10 minutes
to: sysadmin
template: interface_outbound_errors
@@ -187,7 +187,7 @@ component: Network
every: 1m
warn: $this >= 5
delay: down 1h multiplier 1.5 max 2h
- info: number of outbound errors for the network interface $family in the last 10 minutes
+ info: number of outbound errors for the network interface $label:device in the last 10 minutes
to: sysadmin
# -----------------------------------------------------------------------------
@@ -211,7 +211,7 @@ component: Network
every: 1m
warn: $this > 0
delay: down 1h multiplier 1.5 max 2h
- info: number of FIFO errors for the network interface $family in the last 10 minutes
+ info: number of FIFO errors for the network interface $label:device in the last 10 minutes
to: sysadmin
# -----------------------------------------------------------------------------
@@ -234,7 +234,7 @@ component: Network
lookup: average -1m unaligned of received
units: packets
every: 10s
- info: average number of packets received by the network interface $family over the last minute
+ info: average number of packets received by the network interface $label:device over the last minute
template: 10s_received_packets_storm
on: net.packets
@@ -251,6 +251,6 @@ component: Network
warn: $this > (($status >= $WARNING)?(200):(5000))
crit: $this > (($status == $CRITICAL)?(5000):(6000))
options: no-clear-notification
- info: ratio of average number of received packets for the network interface $family over the last 10 seconds, \
+ info: ratio of average number of received packets for the network interface $label:device over the last 10 seconds, \
compared to the rate over the last minute
to: sysadmin
diff --git a/health/health.d/zfs.conf b/health/health.d/zfs.conf
index 785838d477..c53f8032ea 100644
--- a/health/health.d/zfs.conf
+++ b/health/health.d/zfs.conf
@@ -24,7 +24,7 @@ component: File system
every: 10s
warn: $this > 0
delay: down 1m multiplier 1.5 max 1h
- info: ZFS pool $family state is degraded
+ info: ZFS pool $label:pool state is degraded
to: sysadmin
template: zfs_pool_state_crit
@@ -37,5 +37,5 @@ component: File system
every: 10s
crit: $this > 0
delay: down 1m multiplier 1.5 max 1h
- info: ZFS pool $family state is faulted or unavail
+ info: ZFS pool $label:pool state is faulted or unavail
to: sysadmin