remove "os" "hosts" "plugin" and "module" from stock alarms (#17113)

author: Ilya Mashchenko <ilya@netdata.cloud> 2024-03-05 22:37:12 +0200
committer: GitHub <noreply@github.com> 2024-03-05 22:37:12 +0200
commit: 746ebfdbd20045a6e72057736d57820caed73e5b (patch)
tree: c08d04d4c817800fc5ea247ac0c38fffe375e2bd
parent: 291004e39ba61d34f3c89f2ef30143e880a0ee78 (diff)
35 files changed, 1124 insertions, 1306 deletions
diff --git a/src/health/health.d/apcupsd.conf b/src/health/health.d/apcupsd.conf
index 90a72af192..5fd7aa1126 100644
--- a/src/health/health.d/apcupsd.conf
+++ b/src/health/health.d/apcupsd.conf
@@ -5,8 +5,6 @@
     class: Utilization
      type: Power Supply
 component: UPS
-       os: *
-    hosts: *
    lookup: average -10m unaligned of percentage
     units: %
     every: 1m
@@ -23,8 +21,6 @@ component: UPS
     class: Errors
      type: Power Supply
 component: UPS
-       os: *
-    hosts: *
    lookup: average -60s unaligned of charge
     units: %
     every: 60s
diff --git a/src/health/health.d/boinc.conf b/src/health/health.d/boinc.conf
index 092a568450..6fd987de19 100644
--- a/src/health/health.d/boinc.conf
+++ b/src/health/health.d/boinc.conf
@@ -1,4 +1,4 @@
-# Alarms for various BOINC issues.
+# you can disable an alarm notification by setting the 'to' line to: silent
 
 # Warn on any compute errors encountered.
  template: boinc_compute_errors
@@ -6,8 +6,6 @@
     class: Errors
      type: Computing
 component: BOINC
-       os: *
-    hosts: *
    lookup: average -10m unaligned of comperror
     units: tasks
     every: 1m
@@ -23,8 +21,6 @@ component: BOINC
     class: Errors
      type: Computing
 component: BOINC
-       os: *
-    hosts: *
    lookup: average -10m unaligned of upload_failed
     units: tasks
     every: 1m
@@ -40,8 +36,6 @@ component: BOINC
     class: Utilization
      type: Computing
 component: BOINC
-       os: *
-    hosts: *
    lookup: average -10m unaligned of total
     units: tasks
     every: 1m
@@ -57,8 +51,6 @@ component: BOINC
     class: Utilization
      type: Computing
 component: BOINC
-       os: *
-    hosts: *
    lookup: average -10m unaligned of active
      calc: ($boinc_total_tasks >= 1) ? ($this) : (inf)
     units: tasks
diff --git a/src/health/health.d/btrfs.conf b/src/health/health.d/btrfs.conf
index 1557a59410..f43f600c05 100644
--- a/src/health/health.d/btrfs.conf
+++ b/src/health/health.d/btrfs.conf
@@ -1,11 +1,10 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
 
  template: btrfs_allocated
        on: btrfs.disk
     class: Utilization
      type: System
 component: File system
-       os: *
-    hosts: *
      calc: 100 - ($unallocated * 100 / ($unallocated + $data_used + $data_free + $meta_used + $meta_free + $sys_used + $sys_free))
     units: %
     every: 10s
@@ -20,8 +19,6 @@ component: File system
     class: Utilization
      type: System
 component: File system
-       os: *
-    hosts: *
      calc: $used * 100 / ($used + $free)
     units: %
     every: 10s
@@ -37,8 +34,6 @@ component: File system
     class: Utilization
      type: System
 component: File system
-       os: *
-    hosts: *
      calc: ($used + $reserved) * 100 / ($used + $free + $reserved)
     units: %
     every: 10s
@@ -54,8 +49,6 @@ component: File system
     class: Utilization
      type: System
 component: File system
-       os: *
-    hosts: *
      calc: $used * 100 / ($used + $free)
     units: %
     every: 10s
@@ -71,8 +64,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of read_errs
      warn: $this > 0
@@ -86,8 +77,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of write_errs
      crit: $this > 0
@@ -101,8 +90,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of flush_errs
      crit: $this > 0
@@ -116,8 +103,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of corruption_errs
      warn: $this > 0
@@ -131,8 +116,6 @@ component: File system
     class: Errors
      type: System
 component: File system
-       os: *
-    hosts: *
     units: errors
    lookup: max -10m every 1m of generation_errs
      warn: $this > 0
diff --git a/src/health/health.d/cgroups.conf b/src/health/health.d/cgroups.conf
index 9c55633efb..52ca026242 100644
--- a/src/health/health.d/cgroups.conf
+++ b/src/health/health.d/cgroups.conf
@@ -1,72 +1,67 @@
-
 # you can disable an alarm notification by setting the 'to' line to: silent
 
- template: cgroup_10min_cpu_usage
-       on: cgroup.cpu_limit
-    class: Utilization
-     type: Cgroups
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned
-    units: %
-    every: 1m
-     warn: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Cgroup ${label:cgroup_name} CPU utilization
-     info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes
-       to: silent
+   template: cgroup_10min_cpu_usage
+         on: cgroup.cpu_limit
+      class: Utilization
+       type: Cgroups
+  component: CPU
+host labels: _os=linux
+     lookup: average -10m unaligned
+      units: %
+      every: 1m
+       warn: $this > (($status == $CRITICAL) ? (85) : (95))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: Cgroup ${label:cgroup_name} CPU utilization
+       info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes
+         to: silent
 
- template: cgroup_ram_in_use
-       on: cgroup.mem_usage
-    class: Utilization
-     type: Cgroups
-component: Memory
-       os: linux
-    hosts: *
-     calc: ($ram) * 100 / $memory_limit
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Cgroup ${label:cgroup_name} memory utilization
-     info: Cgroup ${label:cgroup_name} memory utilization
-       to: silent
+   template: cgroup_ram_in_use
+         on: cgroup.mem_usage
+      class: Utilization
+       type: Cgroups
+  component: Memory
+host labels: _os=linux
+       calc: ($ram) * 100 / $memory_limit
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (80) : (90))
+       crit: $this > (($status == $CRITICAL) ? (90) : (98))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: Cgroup ${label:cgroup_name} memory utilization
+       info: Cgroup ${label:cgroup_name} memory utilization
+         to: silent
 
 # ---------------------------------K8s containers--------------------------------------------
 
- template: k8s_cgroup_10min_cpu_usage
-       on: k8s.cgroup.cpu_limit
-    class: Utilization
-     type: Cgroups
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization
-     info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
-           average CPU utilization over the last 10 minutes
-       to: silent
+   template: k8s_cgroup_10min_cpu_usage
+         on: k8s.cgroup.cpu_limit
+      class: Utilization
+       type: Cgroups
+  component: CPU
+host labels: _os=linux
+     lookup: average -10m unaligned
+      units: %
+      every: 1m
+       warn: $this > (($status >= $WARNING)  ? (75) : (85))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization
+       info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
+             average CPU utilization over the last 10 minutes
+         to: silent
 
- template: k8s_cgroup_ram_in_use
-       on: k8s.cgroup.mem_usage
-    class: Utilization
-     type: Cgroups
-component: Memory
-       os: linux
-    hosts: *
-     calc: ($ram) * 100 / $memory_limit
-    units: %
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization
-     info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
-           memory utilization
-       to: silent
+   template: k8s_cgroup_ram_in_use
+         on: k8s.cgroup.mem_usage
+      class: Utilization
+       type: Cgroups
+  component: Memory
+host labels: _os=linux
+       calc: ($ram) * 100 / $memory_limit
+      units: %
+      every: 10s
+       warn: $this > (($status >= $WARNING)  ? (80) : (90))
+       crit: $this > (($status == $CRITICAL) ? (90) : (98))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization
+       info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
+             memory utilization
+         to: silent
diff --git a/src/health/health.d/cpu.conf b/src/health/health.d/cpu.conf
index 0b007d6b4b..a3a05855a3 100644
--- a/src/health/health.d/cpu.conf
+++ b/src/health/health.d/cpu.conf
@@ -1,69 +1,65 @@
 
 # you can disable an alarm notification by setting the 'to' line to: silent
 
- template: 10min_cpu_usage
-       on: system.cpu
-    class: Utilization
-     type: System
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned of user,system,softirq,irq,guest
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System CPU utilization
-     info: Average CPU utilization over the last 10 minutes (excluding iowait, nice and steal)
-       to: silent
+   template: 10min_cpu_usage
+         on: system.cpu
+      class: Utilization
+       type: System
+  component: CPU
+host labels: _os=linux
+     lookup: average -10m unaligned of user,system,softirq,irq,guest
+      units: %
+      every: 1m
+       warn: $this > (($status >= $WARNING)  ? (75) : (85))
+       crit: $this > (($status == $CRITICAL) ? (85) : (95))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System CPU utilization
+       info: Average CPU utilization over the last 10 minutes (excluding iowait, nice and steal)
+         to: silent
 
- template: 10min_cpu_iowait
-       on: system.cpu
-    class: Utilization
-     type: System
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned of iowait
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (20) : (40))
-    delay: up 30m down 30m multiplier 1.5 max 2h
-  summary: System CPU iowait time
-     info: Average CPU iowait time over the last 10 minutes
-       to: silent
+   template: 10min_cpu_iowait
+         on: system.cpu
+      class: Utilization
+       type: System
+  component: CPU
+host labels: _os=linux
+     lookup: average -10m unaligned of iowait
+      units: %
+      every: 1m
+       warn: $this > (($status >= $WARNING)  ? (20) : (40))
+      delay: up 30m down 30m multiplier 1.5 max 2h
+    summary: System CPU iowait time
+       info: Average CPU iowait time over the last 10 minutes
+         to: silent
 
- template: 20min_steal_cpu
-       on: system.cpu
-    class: Latency
-     type: System
-component: CPU
-       os: linux
-    hosts: *
-   lookup: average -20m unaligned of steal
-    units: %
-    every: 5m
-     warn: $this > (($status >= $WARNING)  ? (5)  : (10))
-    delay: down 1h multiplier 1.5 max 2h
-  summary: System CPU steal time
-     info: Average CPU steal time over the last 20 minutes
-       to: silent
+   template: 20min_steal_cpu
+         on: system.cpu
+      class: Latency
+       type: System
+  component: CPU
+host labels: _os=linux
+     lookup: average -20m unaligned of steal
+      units: %
+      every: 5m
+       warn: $this > (($status >= $WARNING)  ? (5)  : (10))
+      delay: down 1h multiplier 1.5 max 2h
+    summary: System CPU steal time
+       info: Average CPU steal time over the last 20 minutes
+         to: silent
 
 ## FreeBSD
- template: 10min_cpu_usage
-       on: system.cpu
-    class: Utilization
-     type: System
-component: CPU
-       os: freebsd
-    hosts: *
-   lookup: average -10m unaligned of user,system,interrupt
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (75) : (85))
-     crit: $this > (($status == $CRITICAL) ? (85) : (95))
-    delay: down 15m multiplier 1.5 max 1h
-  summary: System CPU utilization
-     info: Average CPU utilization over the last 10 minutes (excluding nice)
-       to: silent
+   template: 10min_cpu_usage
+         on: system.cpu
+      class: Utilization
+       type: System
+  component: CPU
+host labels: _os=freebsd
+     lookup: average -10m unaligned of user,system,interrupt
+      units: %
+      every: 1m
+       warn: $this > (($status >= $WARNING)  ? (75) : (85))
+       crit: $this > (($status == $CRITICAL) ? (85) : (95))
+      delay: down 15m multiplier 1.5 max 1h
+    summary: System CPU utilization
+       info: Average CPU utilization over the last 10 minutes (excluding nice)
+         to: silent
diff --git a/src/health/health.d/dbengine.conf b/src/health/health.d/dbengine.conf
index 0a70d2e8f9..5585a95335 100644
--- a/src/health/health.d/dbengine.conf
+++ b/src/health/health.d/dbengine.conf
@@ -1,4 +1,3 @@
-
 # you can disable an alarm notification by setting the 'to' line to: silent
 
     alarm: 10min_dbengine_global_fs_errors
@@ -6,8 +5,6 @@
     class: Errors
      type: Netdata
 component: DB engine
-       os: linux freebsd macos
-    hosts: *
    lookup: sum -10m unaligned of fs_errors
     units: errors
     every: 10s
@@ -22,8 +19,6 @@ component: DB engine
     class: Errors
      type: Netdata
 component: DB engine
-       os: linux freebsd macos
-    hosts: *
    lookup: sum -10m unaligned of io_errors
     units: errors
     every: 10s
@@ -38,8 +33,6 @@ component: DB engine
     class: Errors
      type: Netdata
 component: DB engine
-       os: linux freebsd macos
-    hosts: *
    lookup: sum -10m unaligned of pg_cache_over_half_dirty_events
     units: errors
     every: 10s
@@ -55,8 +48,6 @@ component: DB engine
     class: Errors
      type: Netdata
 component: DB engine
-       os: linux freebsd macos
-    hosts: *
    lookup: sum -10m unaligned of flushing_pressure_deletions
     units: pages
     every: 10s
diff --git a/src/health/health.d/disks.conf b/src/health/health.d/disks.conf
index 2e417fd4a3..fe96837fbc 100644
--- a/src/health/health.d/disks.conf
+++ b/src/health/health.d/disks.conf
@@ -1,7 +1,5 @@
-
 # you can disable an alarm notification by setting the 'to' line to: silent
 
-
 # -----------------------------------------------------------------------------
 # low disk space
 
@@ -9,41 +7,39 @@
 # raise an alarm if the disk is low on
 # available disk space
 
- template: disk_space_usage
-       on: disk.space
-    class: Utilization
-     type: System
-component: Disk
-       os: linux freebsd
-    hosts: *
+    template: disk_space_usage
+          on: disk.space
+       class: Utilization
+        type: System
+   component: Disk
+ host labels: _os=linux freebsd
 chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
-     calc: $used * 100 / ($avail + $used)
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING ) ? (80) : (90))
-     crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: Disk ${label:mount_point} space usage
-     info: Total space utilization of disk ${label:mount_point}
-       to: sysadmin
-
- template: disk_inode_usage
-       on: disk.inodes
-    class: Utilization
-     type: System
-component: Disk
-       os: linux freebsd
-    hosts: *
+         calc: $used * 100 / ($avail + $used)
+        units: %
+        every: 1m
+         warn: $this > (($status >= $WARNING ) ? (80) : (90))
+         crit: ($this > (($status == $CRITICAL) ? (90) : (98))) && $avail < 5
+        delay: up 1m down 15m multiplier 1.5 max 1h
+      summary: Disk ${label:mount_point} space usage
+         info: Total space utilization of disk ${label:mount_point}
+           to: sysadmin
+
+    template: disk_inode_usage
+          on: disk.inodes
+       class: Utilization
+        type: System
+   component: Disk
+ host labels: _os=linux freebsd
 chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
-     calc: $used * 100 / ($avail + $used)
-    units: %
-    every: 1m
-     warn: $this > (($status >= $WARNING)  ? (80) : (90))
-     crit: $this > (($status == $CRITICAL) ? (90) : (98))
-    delay: up 1m down 15m multiplier 1.5 max 1h
-  summary: Disk ${label:mount_point} inode usage
-     info: Total inode utilization of disk ${label:mount_point}
-       to: sysadmin
+        calc: $used * 100 / ($avail + $used)
+       units: %
+       every: 1m
+        warn: $this > (($status >= $WARNING)  ? (80) : (90))
+        crit: $this > (($status == $CRITICAL) ? (90) : (98))
+       delay: up 1m down 15m multiplier 1.5 max 1h
+     summary: Disk ${label:mount_point} inode usage
+        info: Total inode utilization of disk ${label:mount_point}
+          to: sysadmin
 
 
 # -----------------------------------------------------------------------------
@@ -57,33 +53,30 @@ chart labels: mount_point=!/dev !/dev/* !/run !/run/* *
 # we will use it in the next template to find
 # the hours remaining
 
-template: disk_fill_rate
-      on: disk.space
-      os: linux freebsd
-   hosts: *
-  lookup: min -10m at -50m unaligned of avail
-    calc: ($this - $avail) / (($now - $after) / 3600)
-   every: 1m
-   units: GB/hour
-    info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
+   template: disk_fill_rate
+         on: disk.space
+host labels: _os=linux freebsd
+     lookup: min -10m at -50m unaligned of avail
+       calc: ($this - $avail) / (($now - $after) / 3600)
+      every: 1m
+      units: GB/hour
+       info: average rate the disk fills up (positive), or frees up (negative) space, for the last hour
 
 # calculate the hours remaining
-# if the disk continues to fill
-# in this rate
-
-template: out_of_disk_space_time
-      on: disk.space
-      os: linux freebsd
-   hosts: *
-    calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
-   units: hours
-   every: 10s
-    warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
-    crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
-   delay: down 15m multiplier 1.2 max 1h
- summary: Disk ${label:mount_point} estimation of lack of space
-    info: Estimated time the disk ${label:mount_point} will run out of space, if the system continues to add data with the rate of the last hour
-      to: silent
+# if the disk continues to fill in this rate
+
+   template: out_of_disk_space_time
+         on: disk.space
+host labels: _os=linux freebsd
+       calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
+      units: hours
+      every: 10s
+       warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
+       crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
+      delay: down 15m multiplier 1.2 max 1h
+    summary: Disk ${label:mount_point} estimation of lack of space
+       info: Estimated time the disk ${label:mount_point} will run out of space, if the system continues to add data with the rate of the last hour
+         to: silent
 
 
 # -----------------------------------------------------------------------------
@@ -97,33 +90,31 @@ template: out_of_disk_space_time
 # we will use it in the next template to find
 # the hours remaining
 
-template: disk_inode_rate
-      on: disk.inodes
-      os: linux freebsd
-   hosts: *
-  lookup: min -10m at -50m unaligned of avail
-    calc: ($this - $avail) / (($now - $after) / 3600)
-   every: 1m
-   units: inodes/hour
-    info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
+   template: disk_inode_rate
+         on: disk.inodes
+host labels: _os=linux freebsd
+     lookup: min -10m at -50m unaligned of avail
+       calc: ($this - $avail) / (($now - $after) / 3600)
+      every: 1m
+      units: inodes/hour
+       info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
 
 # calculate the hours remaining
 # if the disk inodes are allocated
 # in this rate
 
-template: out_of_disk_inodes_time
-      on: disk.inodes
-      os: linux freebsd
-   hosts: *
-    calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
-   units: hours
-   every: 10s
-    warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
-    crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
-   delay: down 15m multiplier 1.2 max 1h
- summary: Disk ${label:mount_point} estimation of lack of inodes
-    info: Estimated time the disk ${label:mount_point} will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
-      to: silent
+   template: out_of_disk_inodes_time
+         on: disk.inodes
+host labels: _os=linux freebsd
+       calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
+      units: hours
+      every: 10s
+       warn: $this > 0 and $this < (($status >= $WARNING)  ? (48) : (8))
+       crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
+      delay: down 15m multiplier 1.2 max 1h
+    summary: Disk ${label:mount_point} estimation of lack of inodes
+       info: Estimated time the disk ${label:mount_point} will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
+         to: silent
 
 
 # -----------------------------------------------------------------------------
@@ -133,21 +124,20 @@ template: out_of_disk_inodes_time
 # by calculating the average disk utilization
 # for the last 10 minutes
 
- template: 10min_disk_utilization
-       on: disk.util
-    class: Utilization
-     type: System
-component: Disk
-       os: linux freebsd
-    hosts: *
-   lookup: average -10m unaligned
-    units: %
-    every: 1m
-     warn: $this > 98 * (($status >= $WARNING)  ? (0.7) : (1))
-    delay: down 15m multiplier 1.2 max 1h
-  summary: Disk ${label:device} utilization
-     info: Average percentage of time ${label:device} disk was busy over the last 10 minutes
-       to: silent
+   template: 10min_disk_utilization
+         on: disk.util
+      class: Utilization
+       type: System
+  component: Disk
+host labels: _os=linux freebsd
+     lookup: average -10m unaligned
+      units: %
+      every: 1m
+       warn: $this > 98 * (($status >= $WARNING)  ? (0.7) : (1))
+      delay: down 15m multiplier 1.2 max 1h
+    summary: Disk ${label:device} utilization
+       info: Average percentage of time ${label:device} disk was busy over the last 10 minutes
+         to: silent
 
 
 # raise an alarm if the disk backlog
@@ -155,18 +145,17 @@ component: Disk
 # for 10 minutes
 # (i.e. the disk cannot catch up)
 
- template: 10min_disk_backlog
-       on: disk.backlog
-    class: Latency
-     type: System
-component: Disk
-       os: linux
-    hosts: *
-   lookup: average -10m unaligned
-    units: ms
-    every: 1m
-     warn: $this > 5000 * (($status >= $WARNING)  ? (0.7) : (1))
-    delay: down 15m multiplier 1.2 max 1h
-  summary: Disk ${label:device} backlog
-     info: Average backlog size of the ${label:device} disk over the last 10 minutes
-       to: silent
+   template: 10min_disk_backlog
+         on: disk.backlog
+      class: Latency
+       type: System
+  component: Disk
+host labels: _os=linux freebsd
+     lookup: average -10m unaligned
+      units: ms
+      every: 1m
+       warn: $this > 5000 * (($status >= $WARNING)  ? (0.7) : (1))
+      delay: down 15m multiplier 1.2 max 1h
+    summary: Disk ${label:device} backlog
+       info: Average backlog size of the ${label:device} disk over the last 10 minutes
+         to: silent
diff --git a/src/health/health.d/entropy.conf b/src/hea
author	Ilya Mashchenko <ilya@netdata.cloud>	2024-03-05 22:37:12 +0200
committer	GitHub <noreply@github.com>	2024-03-05 22:37:12 +0200
commit	746ebfdbd20045a6e72057736d57820caed73e5b (patch)
tree	c08d04d4c817800fc5ea247ac0c38fffe375e2bd
parent	291004e39ba61d34f3c89f2ef30143e880a0ee78 (diff)