summaryrefslogtreecommitdiffstats
path: root/health
diff options
context:
space:
mode:
authorEmmanuel Vasilakis <mrzammler@mm.st>2021-06-14 13:56:23 +0300
committerGitHub <noreply@github.com>2021-06-14 13:56:23 +0300
commitf6ec79cfb8e10b421655df5af1336c91cf41ded4 (patch)
tree07e8443c6170a3288c5fff7f35707afdeda75407 /health
parent59af90b08c705a66bdca7551b43257781db47711 (diff)
Swap class and type attributes in stock alarm configurations (#11240)
* swap type and class * edit REFERENCE.md
Diffstat (limited to 'health')
-rw-r--r--health/REFERENCE.md55
-rw-r--r--health/health.d/adaptec_raid.conf8
-rw-r--r--health/health.d/anomalies.conf8
-rw-r--r--health/health.d/apcupsd.conf12
-rw-r--r--health/health.d/backend.conf12
-rw-r--r--health/health.d/bcache.conf8
-rw-r--r--health/health.d/beanstalkd.conf4
-rw-r--r--health/health.d/bind_rndc.conf4
-rw-r--r--health/health.d/boinc.conf16
-rw-r--r--health/health.d/btrfs.conf16
-rw-r--r--health/health.d/ceph.conf4
-rw-r--r--health/health.d/cgroups.conf8
-rw-r--r--health/health.d/cockroachdb.conf16
-rw-r--r--health/health.d/cpu.conf16
-rw-r--r--health/health.d/dbengine.conf16
-rw-r--r--health/health.d/disks.conf16
-rw-r--r--health/health.d/dns_query.conf4
-rw-r--r--health/health.d/dnsmasq_dhcp.conf4
-rw-r--r--health/health.d/dockerd.conf4
-rw-r--r--health/health.d/elasticsearch.conf4
-rw-r--r--health/health.d/entropy.conf4
-rw-r--r--health/health.d/exporting.conf29
-rw-r--r--health/health.d/fping.conf16
-rw-r--r--health/health.d/fronius.conf4
-rw-r--r--health/health.d/gearman.conf4
-rw-r--r--health/health.d/go.d.plugin.conf4
-rw-r--r--health/health.d/haproxy.conf12
-rw-r--r--health/health.d/hdfs.conf20
-rw-r--r--health/health.d/httpcheck.conf32
-rw-r--r--health/health.d/ioping.conf4
-rw-r--r--health/health.d/ipc.conf8
-rw-r--r--health/health.d/ipfs.conf4
-rw-r--r--health/health.d/ipmi.conf8
-rw-r--r--health/health.d/kubelet.conf36
-rw-r--r--health/health.d/linux_power_supply.conf4
-rw-r--r--health/health.d/load.conf16
-rw-r--r--health/health.d/mdstat.conf16
-rw-r--r--health/health.d/megacli.conf20
-rw-r--r--health/health.d/memcached.conf12
-rw-r--r--health/health.d/memory.conf12
-rw-r--r--health/health.d/mysql.conf44
-rw-r--r--health/health.d/net.conf56
-rw-r--r--health/health.d/netfilter.conf4
-rw-r--r--health/health.d/pihole.conf16
-rw-r--r--health/health.d/portcheck.conf12
-rw-r--r--health/health.d/processes.conf4
-rw-r--r--health/health.d/python.d.plugin.conf4
-rw-r--r--health/health.d/ram.conf20
-rw-r--r--health/health.d/redis.conf8
-rw-r--r--health/health.d/retroshare.conf4
-rw-r--r--health/health.d/riakkv.conf24
-rw-r--r--health/health.d/scaleio.conf8
-rw-r--r--health/health.d/softnet.conf12
-rw-r--r--health/health.d/stiebeleltron.conf4
-rw-r--r--health/health.d/swap.conf8
-rw-r--r--health/health.d/systemdunits.conf40
-rw-r--r--health/health.d/tcp_conn.conf4
-rw-r--r--health/health.d/tcp_listen.conf16
-rw-r--r--health/health.d/tcp_mem.conf4
-rw-r--r--health/health.d/tcp_orphans.conf4
-rw-r--r--health/health.d/tcp_resets.conf16
-rw-r--r--health/health.d/timex.conf4
-rw-r--r--health/health.d/udp_errors.conf8
-rw-r--r--health/health.d/unbound.conf8
-rw-r--r--health/health.d/varnish.conf4
-rw-r--r--health/health.d/vcsa.conf32
-rw-r--r--health/health.d/vernemq.conf104
-rw-r--r--health/health.d/vsphere.conf44
-rw-r--r--health/health.d/web_log.conf96
-rw-r--r--health/health.d/whoisquery.conf4
-rw-r--r--health/health.d/wmi.conf32
-rw-r--r--health/health.d/x509check.conf8
-rw-r--r--health/health.d/zfs.conf12
73 files changed, 591 insertions, 577 deletions
diff --git a/health/REFERENCE.md b/health/REFERENCE.md
index 5ea6b7c5d6..456912ea70 100644
--- a/health/REFERENCE.md
+++ b/health/REFERENCE.md
@@ -59,9 +59,9 @@ Netdata parses the following lines. Beneath the table is an in-depth explanation
| --------------------------------------------------- | --------------- | ------------------------------------------------------------------------------------- |
| [`alarm`/`template`](#alarm-line-alarm-or-template) | yes | Name of the alarm/template. |
| [`on`](#alarm-line-on) | yes | The chart this alarm should attach to. |
-| [`class`](#alarm-line-class) | no | The general classification of the alarm. |
-| [`component`](#alarm-line-component) | no | Specify the component of the class of the alarm. |
-| [`type`](#alarm-line-type) | no | The type of error the alarm monitors. |
+| [`class`](#alarm-line-class) | no | The general alarm classification. |
+| [`type`](#alarm-line-type) | no | What area of the system the alarm monitors. |
+| [`component`](#alarm-line-component) | no | Specific component of the type of the alarm. |
| [`os`](#alarm-line-os) | no | Which operating systems to run this chart. |
| [`hosts`](#alarm-line-hosts) | no | Which hostnames will run this alarm. |
| [`plugin`](#alarm-line-plugin) | no | Restrict an alarm or template to only a certain plugin. |
@@ -136,17 +136,38 @@ If you create a template using the `disk.io` context, it will apply an alarm to
#### Alarm line `class`
-Specify the classification of the alarm or template.
+This indicates the type of error (or general problem area) that the alarm or template applies to. For example, `Latency` can be used for alarms that trigger on latency issues on network interfaces, web servers, or database systems. Example:
-Class can be used to indicate the broader area of the system that the alarm applies to. For example, under the general `Database` class, you can group together alarms that operate on various database systems, like `MySQL`, `CockroachDB`, `CouchDB` etc. Example:
+```yaml
+class: Latency
+```
+
+<details>
+<summary>Netdata's stock alarms use the following `class` attributes by default:</summary>
+
+| Class |
+| ----------------|
+| Errors |
+| Latency |
+| Utilization |
+| Workload |
+
+
+</details>
+
+`class` will default to `Unknown` if the line is missing from the alarm configuration.
+
+#### Alarm line `type`
+
+Type can be used to indicate the broader area of the system that the alarm applies to. For example, under the general `Database` type, you can group together alarms that operate on various database systems, like `MySQL`, `CockroachDB`, `CouchDB` etc. Example:
```yaml
-class: Database
+type: Database
```
<details>
-<summary>Netdata's stock alarms use the following `class` attributes by default, but feel free to adjust for your own requirements.</summary>
+<summary>Netdata's stock alarms use the following `type` attributes by default, but feel free to adjust for your own requirements.</summary>
-| Class | Description |
+| Type | Description |
| ------------------------ | ------------------------------------------------------------------------------------------------ |
| Ad Filtering | Services related to Ad Filtering (like pi-hole) |
| Certificates | Certificates monitoring related |
@@ -162,7 +183,7 @@ class: Database
| Linux | Services specific to Linux (e.g. systemd) |
| Messaging | Alerts for message passing services (e.g. vernemq) |
| Netdata | Internal Netdata components monitoring |
-| Other | Use as a general class of alerts |
+| Other | When an alert doesn't fit in other types. |
| Power Supply | Alerts from power supply related services (e.g. apcupsd) |
| Search engine | Alerts for search services (e.g. elasticsearch) |
| Storage | Class for alerts dealing with storage services (storage devices typically live under `System`) |
@@ -174,26 +195,16 @@ class: Database
</details>
-If an alarm configuration is missing the `class` line, its value will default to `Unknown`.
+If an alarm configuration is missing the `type` line, its value will default to `Unknown`.
#### Alarm line `component`
-Component can be used to narrow down what the previous `class` value specifies for each alarm or template. Continuing from the previous example, `component` might include `MySQL`, `CockroachDB`, `MongoDB`, all under the same `Database` classification. Example:
+Component can be used to narrow down what the previous `type` value specifies for each alarm or template. Continuing from the previous example, `component` might include `MySQL`, `CockroachDB`, `MongoDB`, all under the same `Database` type. Example:
```yaml
component: MySQL
```
-As with the `class` line, if `component` is missing from the configuration, its value will default to `Unknown`.
-
-#### Alarm line `type`
-
-This indicates the type of error (or general problem area) that the alarm or template applies to. For example, `Latency` can be used for alarms that trigger on latency issues in network interfaces, web servers, or database systems. Example:
-
-```yaml
-type: Latency
-```
-
-`type` will also (as with `class` and `component`) default to `Unknown` if the line is missing from the alarm configuration.
+As with the `class` and `type` line, if `component` is missing from the configuration, its value will default to `Unknown`.
#### Alarm line `os`
diff --git a/health/health.d/adaptec_raid.conf b/health/health.d/adaptec_raid.conf
index b067e18401..1d823adddd 100644
--- a/health/health.d/adaptec_raid.conf
+++ b/health/health.d/adaptec_raid.conf
@@ -3,9 +3,9 @@
template: adaptec_raid_ld_status
on: adaptec_raid.ld_status
- class: System
+ class: Errors
+ type: System
component: RAID
- type: Errors
lookup: max -10s foreach *
units: bool
every: 10s
@@ -18,9 +18,9 @@ component: RAID
template: adaptec_raid_pd_state
on: adaptec_raid.pd_state
- class: System
+ class: Errors
+ type: System
component: RAID
- type: Errors
lookup: max -10s foreach *
units: bool
every: 10s
diff --git a/health/health.d/anomalies.conf b/health/health.d/anomalies.conf
index f27e39fc10..269ae544b0 100644
--- a/health/health.d/anomalies.conf
+++ b/health/health.d/anomalies.conf
@@ -2,9 +2,9 @@
template: anomalies_anomaly_probabilities
on: anomalies.probability
- class: Netdata
+ class: Errors
+ type: Netdata
component: ML
- type: Errors
lookup: average -2m foreach *
every: 1m
warn: $this > 50
@@ -14,9 +14,9 @@ component: ML
template: anomalies_anomaly_flags
on: anomalies.anomaly
- class: Netdata
+ class: Errors
+ type: Netdata
component: ML
- type: Errors
lookup: sum -2m foreach *
every: 1m
warn: $this > 10
diff --git a/health/health.d/apcupsd.conf b/health/health.d/apcupsd.conf
index 07b5c28c97..65f1a69ab9 100644
--- a/health/health.d/apcupsd.conf
+++ b/health/health.d/apcupsd.conf
@@ -2,9 +2,9 @@
template: apcupsd_10min_ups_load
on: apcupsd.load
- class: Power Supply
+ class: Utilization
+ type: Power Supply
component: UPS
- type: Utilization
os: *
hosts: *
lookup: average -10m unaligned of percentage
@@ -20,9 +20,9 @@ component: UPS
# Fire the alarm as soon as it's going on battery (99% charge) and clear only when full.
template: apcupsd_ups_charge
on: apcupsd.charge
- class: Power Supply
+ class: Errors
+ type: Power Supply
component: UPS
- type: Errors
os: *
hosts: *
lookup: average -60s unaligned of charge
@@ -36,9 +36,9 @@ component: UPS
template: apcupsd_last_collected_secs
on: apcupsd.load
- class: Power Supply
+ class: Latency
+ type: Power Supply
component: UPS device
- type: Latency
calc: $now - $last_collected_t
every: 10s
units: seconds ago
diff --git a/health/health.d/backend.conf b/health/health.d/backend.conf
index 948ea551a0..91d469395e 100644
--- a/health/health.d/backend.conf
+++ b/health/health.d/backend.conf
@@ -1,9 +1,9 @@
# Alert that backends subsystem will be disabled soon
alarm: backend_metrics_eol
on: netdata.backend_metrics
- class: Netdata
+ class: Errors
+ type: Netdata
component: Exporting engine
- type: Errors
units: boolean
calc: $now - $last_collected_t
every: 1m
@@ -16,9 +16,9 @@ component: Exporting engine
alarm: backend_last_buffering
on: netdata.backend_metrics
- class: Netdata
+ class: Latency
+ type: Netdata
component: Exporting engine
- type: Latency
calc: $now - $last_collected_t
units: seconds ago
every: 10s
@@ -30,9 +30,9 @@ component: Exporting engine
alarm: backend_metrics_sent
on: netdata.backend_metrics
- class: Netdata
+ class: Workload
+ type: Netdata
component: Exporting engine
- type: Workload
units: %
calc: abs($sent) * 100 / abs($buffered)
every: 10s
diff --git a/health/health.d/bcache.conf b/health/health.d/bcache.conf
index d75d8e19b0..49cb5ad0f6 100644
--- a/health/health.d/bcache.conf
+++ b/health/health.d/bcache.conf
@@ -1,9 +1,9 @@
template: bcache_cache_errors
on: disk.bcache_cache_read_races
- class: System
+ class: Errors
+ type: System
component: Disk
- type: Errors
lookup: sum -1m unaligned absolute
units: errors
every: 1m
@@ -16,9 +16,9 @@ component: Disk
template: bcache_cache_dirty
on: disk.bcache_cache_alloc
- class: System
+ class: Utilization
+ type: System
component: Disk
- type: Utilization
calc: $dirty + $metadata + $undefined
units: %
every: 1m
diff --git a/health/health.d/beanstalkd.conf b/health/health.d/beanstalkd.conf
index 99c7545710..13ac8c1825 100644
--- a/health/health.d/beanstalkd.conf
+++ b/health/health.d/beanstalkd.conf
@@ -2,9 +2,9 @@
template: beanstalk_server_buried_jobs
on: beanstalk.current_jobs
- class: Messaging
+ class: Workload
+ type: Messaging
component: Beanstalk
- type: Workload
calc: $buried
units: jobs
every: 10s
diff --git a/health/health.d/bind_rndc.conf b/health/health.d/bind_rndc.conf
index e88f87a4fa..7c09225ff6 100644
--- a/health/health.d/bind_rndc.conf
+++ b/health/health.d/bind_rndc.conf
@@ -1,8 +1,8 @@
template: bind_rndc_stats_file_size
on: bind_rndc.stats_size
- class: DNS
+ class: Utilization
+ type: DNS
component: BIND
- type: Utilization
units: megabytes
every: 60
calc: $stats_size
diff --git a/health/health.d/boinc.conf b/health/health.d/boinc.conf
index 8604abee96..7d7a4fdae3 100644
--- a/health/health.d/boinc.conf
+++ b/health/health.d/boinc.conf
@@ -3,9 +3,9 @@
# Warn on any compute errors encountered.
template: boinc_compute_errors
on: boinc.states
- class: Computing
+ class: Errors
+ type: Computing
component: BOINC
- type: Errors
os: *
hosts: *
families: *
@@ -21,9 +21,9 @@ component: BOINC
# Warn on lots of upload errors
template: boinc_upload_errors
on: boinc.states
- class: Computing
+ class: Errors
+ type: Computing
component: BOINC
- type: Errors
os: *
hosts: *
families: *
@@ -39,9 +39,9 @@ component: BOINC
# Warn on the task queue being empty
template: boinc_total_tasks
on: boinc.tasks
- class: Computing
+ class: Utilization
+ type: Computing
component: BOINC
- type: Utilization
os: *
hosts: *
families: *
@@ -57,9 +57,9 @@ component: BOINC
# Warn on no active tasks with a non-empty queue
template: boinc_active_tasks
on: boinc.tasks
- class: Computing
+ class: Utilization
+ type: Computing
component: BOINC
- type: Utilization
os: *
hosts: *
families: *
diff --git a/health/health.d/btrfs.conf b/health/health.d/btrfs.conf
index d3200a7eee..8d197aa8d2 100644
--- a/health/health.d/btrfs.conf
+++ b/health/health.d/btrfs.conf
@@ -1,9 +1,9 @@
template: btrfs_allocated
on: btrfs.disk
- class: System
+ class: Utilization
+ type: System
component: File system
- type: Utilization
os: *
hosts: *
families: *
@@ -18,9 +18,9 @@ component: File system
template: btrfs_data
on: btrfs.data
- class: System
+ class: Utilization
+ type: System
component: File system
- type: Utilization
os: *
hosts: *
families: *
@@ -35,9 +35,9 @@ component: File system
template: btrfs_metadata
on: btrfs.metadata
- class: System
+ class: Utilization
+ type: System
component: File system
- type: Utilization
os: *
hosts: *
families: *
@@ -52,9 +52,9 @@ component: File system
template: btrfs_system
on: btrfs.system
- class: System
+ class: