summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFotis Voutsas <fotis@netdata.cloud>2023-07-18 11:00:23 +0300
committerGitHub <noreply@github.com>2023-07-18 11:00:23 +0300
commit25586826d42107952ead3348dbce0c0223adf61d (patch)
treed6af5389beeed5e459b463e2258ce699159bbd7c
parent489c3f5c66be6d3d63c3975bdbf6add861141998 (diff)
Bugfix on alerts generation for yamls (#15420)
-rw-r--r--collectors/cgroups.plugin/multi_metadata.yaml9
-rw-r--r--collectors/charts.d.plugin/apcupsd/metadata.yaml4
-rw-r--r--collectors/charts.d.plugin/nut/metadata.yaml4
-rw-r--r--collectors/ebpf.plugin/multi_metadata.yaml9
-rw-r--r--collectors/freebsd.plugin/multi_metadata.yaml67
-rw-r--r--collectors/freeipmi.plugin/multi_metadata.yaml1
-rw-r--r--collectors/ioping.plugin/metadata.yaml6
-rw-r--r--collectors/macos.plugin/multi_metadata.yaml34
-rw-r--r--collectors/proc.plugin/multi_metadata.yaml127
-rw-r--r--collectors/python.d.plugin/adaptec_raid/metadata.yaml5
-rw-r--r--collectors/python.d.plugin/anomalies/metadata.yaml5
-rw-r--r--collectors/python.d.plugin/beanstalk/metadata.yaml1
-rw-r--r--collectors/python.d.plugin/bind_rndc/metadata.yaml6
-rw-r--r--collectors/python.d.plugin/boinc/metadata.yaml5
-rw-r--r--collectors/python.d.plugin/ceph/metadata.yaml6
-rw-r--r--collectors/python.d.plugin/gearman/metadata.yaml6
-rw-r--r--collectors/python.d.plugin/haproxy/metadata.yaml5
-rw-r--r--collectors/python.d.plugin/ipfs/metadata.yaml6
-rw-r--r--collectors/python.d.plugin/megacli/metadata.yaml8
-rw-r--r--collectors/python.d.plugin/memcached/metadata.yaml7
-rw-r--r--collectors/python.d.plugin/retroshare/metadata.yaml6
-rw-r--r--collectors/python.d.plugin/riakkv/metadata.yaml9
-rw-r--r--collectors/timex.plugin/metadata.yaml8
23 files changed, 303 insertions, 41 deletions
diff --git a/collectors/cgroups.plugin/multi_metadata.yaml b/collectors/cgroups.plugin/multi_metadata.yaml
index 8a586127eb..b67749e8f2 100644
--- a/collectors/cgroups.plugin/multi_metadata.yaml
+++ b/collectors/cgroups.plugin/multi_metadata.yaml
@@ -616,20 +616,23 @@ modules:
metric: cgroup.net_packets
info: average number of packets received by the network interface ${label:device}
over the last minute
- os: "linux"
- name: cgroup_10s_received_packets_storm
link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf
metric: cgroup.net_packets
info: ratio of average number of received packets for the network interface
${label:device} over the last 10 seconds, compared to the rate over the
last minute
- os: "linux"
- name: k8s_cgroup_1m_received_packets_rate
link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf
metric: k8s.cgroup.net_packets
info: average number of packets received by the network interface ${label:device}
over the last minute
- os: "linux"
+ - name: k8s_cgroup_10s_received_packets_storm
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf
+ metric: k8s.cgroup.net_packets
+ info: ratio of average number of received packets for the network interface
+ ${label:device} over the last 10 seconds, compared to the rate over the
+ last minute
metrics:
folding:
title: Metrics
diff --git a/collectors/charts.d.plugin/apcupsd/metadata.yaml b/collectors/charts.d.plugin/apcupsd/metadata.yaml
index 1661620a21..a0df692518 100644
--- a/collectors/charts.d.plugin/apcupsd/metadata.yaml
+++ b/collectors/charts.d.plugin/apcupsd/metadata.yaml
@@ -63,6 +63,10 @@ alerts:
metric: apcupsd.load
info: average UPS load over the last 10 minutes
os: "*"
+ - name: apcupsd_last_collected_secs
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf
+ metric: apcupsd.load
+ info: number of seconds since the last successful data collection
metrics:
folding:
title: Metrics
diff --git a/collectors/charts.d.plugin/nut/metadata.yaml b/collectors/charts.d.plugin/nut/metadata.yaml
index 994c683f18..3991f67063 100644
--- a/collectors/charts.d.plugin/nut/metadata.yaml
+++ b/collectors/charts.d.plugin/nut/metadata.yaml
@@ -63,6 +63,10 @@ alerts:
metric: nut.load
info: average UPS load over the last 10 minutes
os: "*"
+ - name: nut_last_collected_secs
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf
+ metric: nut.load
+ info: number of seconds since the last successful data collection
metrics:
folding:
title: Metrics
diff --git a/collectors/ebpf.plugin/multi_metadata.yaml b/collectors/ebpf.plugin/multi_metadata.yaml
index ca4e51df12..cec161aff7 100644
--- a/collectors/ebpf.plugin/multi_metadata.yaml
+++ b/collectors/ebpf.plugin/multi_metadata.yaml
@@ -705,7 +705,14 @@ modules:
troubleshooting:
problems:
list: []
- alerts: []
+ alerts:
+ - name: sync_freq
+ link: |
+ https://github.com/netdata/netdata/blob/master/health/health.d/synchronization.conf
+ metric: mem.sync
+ info: number of sync() system calls. Every call causes all pending modifications
+ to filesystem metadata and cached file data to be written to the underlying
+ filesystems.
metrics:
folding:
title: Metrics
diff --git a/collectors/freebsd.plugin/multi_metadata.yaml b/collectors/freebsd.plugin/multi_metadata.yaml
index a761d18b64..bd9ad964b2 100644
--- a/collectors/freebsd.plugin/multi_metadata.yaml
+++ b/collectors/freebsd.plugin/multi_metadata.yaml
@@ -70,6 +70,11 @@ modules:
metric: system.load
info: system five-minute load average
os: "linux"
+ - name: load_average_1
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
+ metric: system.load
+ info: system one-minute load average
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -143,7 +148,11 @@ modules:
troubleshooting:
problems:
list: []
- alerts: []
+ alerts:
+ - name: active_processes
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/processes.conf
+ metric: system.active_processes
+ info: system process IDs (PID) space utilization
metrics:
folding:
title: Metrics
@@ -245,6 +254,11 @@ modules:
metric: system.cpu
info: average CPU steal time over the last 20 minutes
os: "linux"
+ - name: 10min_cpu_usage
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
+ metric: system.cpu
+ info: average CPU utilization over the last 10 minutes (excluding nice)
+ os: "freebsd"
metrics:
folding:
title: Metrics
@@ -778,7 +792,12 @@ modules:
troubleshooting:
problems:
list: []
- alerts: []
+ alerts:
+ - name: used_swap
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf
+ metric: system.swap
+ info: swap memory utilization
+ os: "linux freebsd"
metrics:
folding:
title: Metrics
@@ -868,6 +887,12 @@ modules:
info: percentage of estimated amount of RAM available for userspace processes,
without causing swapping
os: "linux"
+ - name: ram_available
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
+ metric: mem.available
+ info: percentage of estimated amount of RAM available for userspace processes,
+ without causing swapping
+ os: "freebsd"
metrics:
folding:
title: Metrics
@@ -1111,6 +1136,11 @@ modules:
metric: system.ipc_semaphores
info: IPC semaphore utilization
os: "linux"
+ - name: semaphore_arrays_used
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf
+ metric: system.ipc_semaphore_arrays
+ info: IPC semaphore arrays utilization
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -1437,6 +1467,12 @@ modules:
or net.core.netdev_budget_usecs with work remaining over the last minute
(this can be a cause for dropped packets)
os: "linux"
+ - name: 10min_netisr_backlog_exceeded
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf
+ metric: system.softnet_stat
+ info: average number of drops in the last minute due to exceeded sysctl net.route.netisr_maxqlen
+ (this can be a cause for dropped packets)
+ os: "freebsd"
metrics:
folding:
title: Metrics
@@ -1668,7 +1704,12 @@ modules:
troubleshooting:
problems:
list: []
- alerts: []
+ alerts:
+ - name: tcp_connections
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_conn.conf
+ metric: ipv4.tcpsock
+ info: IPv4 TCP connections utilization
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -1758,6 +1799,13 @@ modules:
metric: ipv4.tcphandshake
info: average number of received TCP RESETS over the last minute
os: "linux freebsd"
+ - name: 10s_ipv4_tcp_resets_received
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
+ metric: ipv4.tcphandshake
+ info: average number of received TCP RESETS over the last 10 seconds. This
+ can be an indication that a service this host needs has crashed. Netdata
+ will not send a clear notification for this alarm.
+ os: "linux freebsd"
metrics:
folding:
title: Metrics
@@ -1893,6 +1941,11 @@ modules:
metric: ipv4.udperrors
info: average number of UDP receive buffer errors over the last minute
os: "linux freebsd"
+ - name: 1m_ipv4_udp_send_buffer_errors
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
+ metric: ipv4.udperrors
+ info: average number of UDP send buffer errors over the last minute
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -2553,6 +2606,13 @@ modules:
info: average number of packets received by the network interface ${label:device}
over the last minute
os: "linux freebsd"
+ - name: 10s_received_packets_storm
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
+ metric: net.packets
+ info: ratio of average number of received packets for the network interface
+ ${label:device} over the last 10 seconds, compared to the rate over the
+ last minute
+ os: "linux freebsd"
- name: interface_inbound_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
metric: net.errors
@@ -2809,7 +2869,6 @@ modules:
link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
metric: zfs.memory_ops
info: number of times ZFS had to limit the ARC growth in the last 10 minutes
- os: "*"
metrics:
folding:
title: Metrics
diff --git a/collectors/freeipmi.plugin/multi_metadata.yaml b/collectors/freeipmi.plugin/multi_metadata.yaml
index 36eb7be237..473c8b2cb4 100644
--- a/collectors/freeipmi.plugin/multi_metadata.yaml
+++ b/collectors/freeipmi.plugin/multi_metadata.yaml
@@ -131,7 +131,6 @@ modules:
link: https://github.com/netdata/netdata/blob/master/health/health.d/ipmi.conf
metric: ipmi.sensor_state
info: IPMI sensor ${label:sensor} (${label:component}) state
- os: "linux"
metrics:
folding:
title: Metrics
diff --git a/collectors/ioping.plugin/metadata.yaml b/collectors/ioping.plugin/metadata.yaml
index f21543fbe3..11c835d67f 100644
--- a/collectors/ioping.plugin/metadata.yaml
+++ b/collectors/ioping.plugin/metadata.yaml
@@ -52,7 +52,11 @@ setup:
troubleshooting:
problems:
list: []
-alerts: []
+alerts:
+ - name: ioping_disk_latency
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/ioping.conf
+ metric: ioping.latency
+ info: average I/O latency over the last 10 seconds
metrics:
folding:
title: Metrics
diff --git a/collectors/macos.plugin/multi_metadata.yaml b/collectors/macos.plugin/multi_metadata.yaml
index 0608c41ad8..a853777e2f 100644
--- a/collectors/macos.plugin/multi_metadata.yaml
+++ b/collectors/macos.plugin/multi_metadata.yaml
@@ -71,6 +71,11 @@ modules:
metric: system.cpu
info: average CPU steal time over the last 20 minutes
os: "linux"
+ - name: 10min_cpu_usage
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
+ metric: system.cpu
+ info: average CPU utilization over the last 10 minutes (excluding nice)
+ os: "freebsd"
- name: ram_in_use
link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
metric: system.ram
@@ -210,6 +215,16 @@ modules:
metric: system.load
info: system five-minute load average
os: "linux"
+ - name: load_average_1
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
+ metric: system.load
+ info: system one-minute load average
+ os: "linux"
+ - name: used_swap
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf
+ metric: system.swap
+ info: swap memory utilization
+ os: "linux freebsd"
- name: 1m_ipv4_tcp_resets_sent
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
metric: ipv4.tcphandshake
@@ -227,11 +242,23 @@ modules:
metric: ipv4.tcphandshake
info: average number of received TCP RESETS over the last minute
os: "linux freebsd"
+ - name: 10s_ipv4_tcp_resets_received
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
+ metric: ipv4.tcphandshake
+ info: average number of received TCP RESETS over the last 10 seconds. This
+ can be an indication that a service this host needs has crashed. Netdata
+ will not send a clear notification for this alarm.
+ os: "linux freebsd"
- name: 1m_ipv4_udp_receive_buffer_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
metric: ipv4.udperrors
info: average number of UDP receive buffer errors over the last minute
os: "linux freebsd"
+ - name: 1m_ipv4_udp_send_buffer_errors
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
+ metric: ipv4.udperrors
+ info: average number of UDP send buffer errors over the last minute
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -629,6 +656,13 @@ modules:
info: average number of packets received by the network interface ${label:device}
over the last minute
os: "linux freebsd"
+ - name: 10s_received_packets_storm
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
+ metric: net.packets
+ info: ratio of average number of received packets for the network interface
+ ${label:device} over the last 10 seconds, compared to the rate over the
+ last minute
+ os: "linux freebsd"
- name: interface_inbound_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
metric: net.errors
diff --git a/collectors/proc.plugin/multi_metadata.yaml b/collectors/proc.plugin/multi_metadata.yaml
index 61437dee47..3ca89969cf 100644
--- a/collectors/proc.plugin/multi_metadata.yaml
+++ b/collectors/proc.plugin/multi_metadata.yaml
@@ -71,6 +71,11 @@ modules:
metric: system.cpu
info: average CPU steal time over the last 20 minutes
os: "linux"
+ - name: 10min_cpu_usage
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
+ metric: system.cpu
+ info: average CPU utilization over the last 10 minutes (excluding nice)
+ os: "freebsd"
metrics:
folding:
title: Metrics
@@ -221,7 +226,12 @@ modules:
troubleshooting:
problems:
list: []
- alerts: []
+ alerts:
+ - name: lowest_entropy
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/entropy.conf
+ metric: system.entropy
+ info: minimum number of entries in the random numbers pool in the last 5 minutes
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -659,6 +669,15 @@ modules:
metric: system.load
info: system five-minute load average
os: "linux"
+ - name: load_average_1
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
+ metric: system.load
+ info: system one-minute load average
+ os: "linux"
+ - name: active_processes
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/processes.conf
+ metric: system.active_processes
+ info: system process IDs (PID) space utilization
metrics:
folding:
title: Metrics
@@ -986,6 +1005,12 @@ modules:
or net.core.netdev_budget_usecs with work remaining over the last minute
(this can be a cause for dropped packets)
os: "linux"
+ - name: 10min_netisr_backlog_exceeded
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf
+ metric: system.softnet_stat
+ info: average number of drops in the last minute due to exceeded sysctl net.route.netisr_maxqlen
+ (this can be a cause for dropped packets)
+ os: "freebsd"
metrics:
folding:
title: Metrics
@@ -1092,6 +1117,22 @@ modules:
info: percentage of estimated amount of RAM available for userspace processes,
without causing swapping
os: "linux"
+ - name: ram_available
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
+ metric: mem.available
+ info: percentage of estimated amount of RAM available for userspace processes,
+ without causing swapping
+ os: "freebsd"
+ - name: used_swap
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf
+ metric: system.swap
+ info: swap memory utilization
+ os: "linux freebsd"
+ - name: 1hour_memory_hw_corrupted
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
+ metric: mem.hwcorrupt
+ info: amount of memory corrupted due to a hardware failure
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -1677,6 +1718,11 @@ modules:
metric: system.ipc_semaphores
info: IPC semaphore utilization
os: "linux"
+ - name: semaphore_arrays_used
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf
+ metric: system.ipc_semaphore_arrays
+ info: IPC semaphore arrays utilization
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -1779,19 +1825,28 @@ modules:
problems:
list: []
alerts:
+ - name: 10min_disk_backlog
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf
+ metric: disk.backlog
+ info: average backlog size of the ${label:device} disk over the last 10 minutes
+ os: "linux"
- name: 10min_disk_utilization
link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf
metric: disk.util
info: average percentage of time ${label:device} disk was busy over the last
10 minutes
os: "linux freebsd"
+ - name: bcache_cache_dirty
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf
+ metric: disk.bcache_cache_alloc
+ info: percentage of cache space used for dirty data and metadata (this usually
+ means your SSD cache is too small)
- name: bcache_cache_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf
metric: disk.bcache_cache_read_races
info: number of times data was read from the cache, the bucket was reused
and invalidated in the last 10 minutes (when this occurs the data is reread
from the backing device)
- os: "freebsd"
metrics:
folding:
title: Metrics
@@ -2051,19 +2106,20 @@ modules:
link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
metric: md.disks
info: number of seconds since the last successful data collection
- os: "*"
- name: mdstat_disks
link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
metric: md.disks
info: number of devices in the down state for the ${label:device} ${label:raid_level}
array. Any number > 0 indicates that the array is degraded.
- os: "*"
- name: mdstat_mismatch_cnt
link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
metric: md.mismatch_cnt
info: number of unsynchronized blocks for the ${label:device} ${label:raid_level}
array
- os: "*"
+ - name: mdstat_nonredundant_last_collected
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
+ metric: md.nonredundant
+ info: number of seconds since the last successful data collection
metrics:
folding:
title: Metrics
@@ -2231,6 +2287,13 @@ modules:
info: average number of packets received by the network interface ${label:device}
over the last minute
os: "linux freebsd"
+ - name: 10s_received_packets_storm
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
+ metric: net.packets
+ info: ratio of average number of received packets for the network interface
+ ${label:device} over the last 10 seconds, compared to the rate over the
+ last minute
+ os: "linux freebsd"
- name: interface_inbound_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
metric: net.errors
@@ -2708,6 +2771,11 @@ modules:
info: average number of dropped packets in the TCP accept queue over the last
minute
os: "linux"
+ - name: tcp_connections
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_conn.conf
+ metric: ipv4.tcpsock
+ info: IPv4 TCP connections utilization
+ os: "linux"
- name: 1m_ipv4_tcp_resets_sent
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
metric: ipv4.tcphandshake
@@ -2725,11 +2793,23 @@ modules:
metric: ipv4.tcphandshake
info: average number of received TCP RESETS over the last minute
os: "linux freebsd"
+ - name: 10s_ipv4_tcp_resets_received
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
+ metric: ipv4.tcphandshake
+ info: average number of received TCP RESETS over the last 10 seconds. This
+ can be an indication that a service this host needs has crashed. Netdata
+ will not send a clear notification for this alarm.
+ os: "linux freebsd"
- name: 1m_ipv4_udp_receive_buffer_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
metric: ipv4.udperrors
info: average number of UDP receive buffer errors over the last minute
os: "linux freebsd"
+ - name: 1m_ipv4_udp_send_buffer_errors
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
+ metric: ipv4.udperrors
+ info: average number of UDP send buffer errors over the last minute
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -3264,7 +3344,17 @@ modules:
troubleshooting:
problems:
list: []
- alerts: []
+ alerts:
+ - name: tcp_orphans
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_orphans.conf
+ metric: ipv4.sockstat_tcp_sockets
+ info: orphan IPv4 TCP sockets utilization
+ os: "linux"
+ - name: tcp_memory
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_mem.conf
+ metric: ipv4.sockstat_tcp_mem
+ info: TCP memory utilization
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -3902,7 +3992,12 @@ modules:
troubleshooting:
problems:
list: []
- alerts: []
+ alerts:
+ - name: netfilter_conntrack_full
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/netfilter.conf
+ metric: netfilter.conntrack_sockets
+ info: netfilter connection tracker table size utilization
+ os: "linux"
metrics:
folding:
title: Metrics
@@ -4106,7 +4201,10 @@ modules:
link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
metric: zfspool.state
info: ZFS pool ${label:pool} state is degraded
- os: "*"
+ - name: zfs_pool_state_crit
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
+ metric: zfspool.state
+ info: ZFS pool ${label:pool} state is faulted or unavail
metrics:
folding:
title: Metrics
@@ -4191,7 +4289,6 @@ modules:
link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
metric: zfs.memory_ops
info: number of times ZFS had to limit the ARC growth in the last 10 minutes
- os: "*"
metrics:
folding:
title: Metrics
@@ -4490,6 +4587,11 @@ modules:
metric: btrfs.device_errors
info: number of encountered BTRFS corruption errors
os: "*"
+ - name: btrfs_device_generation_errors
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
+ metric: btrfs.device_errors
+ info: number of encountered BTRFS generation errors
+ os: "*"
metrics:
folding:
title: Metrics
@@ -4632,7 +4734,12 @@ modules:
troubleshooting:
problems:
list: []
- alerts: []
+ alerts:
+ - name: linux_power_supply_capacity
+ link: |
+ https://github.com/netdata/netdata/blob/master/health/health.d/linux_power_supply.conf
+ metric: powersupply.capacity
+ info: percentage of remaining power supply capacity
metrics:
folding:
title: Metrics
diff --git a/collectors/python.d.plugin/adaptec_raid/metadata.yaml b/collectors/python.d.plugin/adaptec_raid/metadata.yaml
index 22dcc1e007..9e2a7de7c7 100644
--- a/collectors/python.d.plugin/adaptec_raid/metadata.yaml
+++ b/collectors/python.d.plugin/adaptec_raid/metadata.yaml
@@ -57,7 +57,10 @@ alerts:
link: https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf
metric: adaptec_raid.ld_status
info: logical device status is failed or degraded
- os: "linux"
+ - name: adaptec_raid_pd_state
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf
+ metric: adaptec_raid.pd_state
+ info: physical device state is not online
metrics:
folding:
title: Metrics
diff --git a/collectors/python.d.plugin/anomalies/metadata.yaml b/collectors/python.d.plugin/anomalies/metadata.yaml
index 6d643ceb49..415159d966 100644
--- a/collectors/python.d.plugin/anomalies/metadata.yaml
+++ b/collectors/python.d.plugin/anomalies/metadata.yaml
@@ -57,7 +57,10 @@ alerts:
link: https://github.com/netdata/netdata/blob/master/health/health.d/anomalies.conf
metric: anomalies.probability
info: average anomaly probability over the last 2 minutes
- os: "linux"
+ - name: anomalies_anomaly_flags
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/anomalies.conf
+ metric: anomalies.anomaly
+ info: number of anomalies in the last 2 minutes
metrics:
folding:
title: Metrics
diff --git a/collectors/python.d.plugin/beanstalk/metadata.yaml b/collectors/python.d.plugin/beanstalk/metadata.yaml
index 1d04a1649e..d5b927fac6 100644
--- a/collectors/python.d.plugin/beanstalk/metadata.yaml
+++ b/collectors/python.d.plugin/beanstalk/metadata.yaml
@@ -59,7 +59,6 @@ alerts:
info: number of buried jobs across all tubes. You need to manually kick them so
they can be processed. Presence of buried jobs in a tube does not affect new
jobs.
- os: "freebsd"
metrics:
folding:
title: Metrics
diff --git a/collectors/python.d.plugin/bind_rndc/metadata.yaml b/collectors/python.d.plugin/bind_rndc/metadata.yaml
index 2677e13068..dfc8964a8b 100644
--- a/collectors/python.d.plugin/bind_rndc/metadata.yaml
+++ b/collectors/python.d.plugin/bind_rndc/metadata.yaml
@@ -52,7 +52,11 @@ setup:
troubleshooting:
problems:
list: []
-alerts: []
+alerts:
+ - name: bind_rndc_stats_file_size
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/bind_rndc.conf
+ metric: bind_rndc.stats_size
+ info: BIND statistics-file size
metrics:
folding:
title: Metrics
diff --git a/collectors/python.d.plugin/boinc/metadata.yaml b/collectors/python.d.plugin/boinc/metadata.yaml
index 1a7487f47c..da59dfd0fd 100644
--- a/collectors/python.d.plugin/boinc/metadata.yaml
+++ b/collectors/python.d.plugin/boinc/metadata.yaml
@@ -58,6 +58,11 @@ alerts:
metric: boinc.tasks
info: average number of total tasks over the last 10 minutes
os: "*"
+ - name: boinc_active_tasks
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf
+ metric: boinc.tasks
+ info: average number of active tasks over the last 10 minutes
+ os: "*"
- name: boinc_compute_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf