From 2555dccae77ebe4ac132c13feca610be1dd26857 Mon Sep 17 00:00:00 2001
From: Ilya Mashchenko <ilya@netdata.cloud>
Date: Mon, 24 May 2021 14:42:49 +0300
Subject: health: add python.d/go.d jobs last_collected_secs alarms (#11168)

---
 health/Makefile.am                   | 15 ++-------------
 health/health.d/am2320.conf          | 15 ---------------
 health/health.d/apache.conf          | 17 -----------------
 health/health.d/cockroachdb.conf     | 16 ----------------
 health/health.d/couchdb.conf         | 16 ----------------
 health/health.d/gearman.conf         | 14 --------------
 health/health.d/go.d.plugin.conf     | 17 +++++++++++++++++
 health/health.d/hdfs.conf            | 17 -----------------
 health/health.d/httpcheck.conf       | 14 --------------
 health/health.d/lighttpd.conf        | 17 -----------------
 health/health.d/memcached.conf       | 17 -----------------
 health/health.d/mongodb.conf         | 16 ----------------
 health/health.d/mysql.conf           | 18 ------------------
 health/health.d/named.conf           | 17 -----------------
 health/health.d/nginx.conf           | 17 -----------------
 health/health.d/nginx_plus.conf      | 17 -----------------
 health/health.d/phpfpm.conf          | 17 -----------------
 health/health.d/pihole.conf          | 16 ----------------
 health/health.d/portcheck.conf       | 14 --------------
 health/health.d/postgres.conf        | 16 ----------------
 health/health.d/pulsar.conf          | 16 ----------------
 health/health.d/python.d.plugin.conf | 17 +++++++++++++++++
 health/health.d/redis.conf           | 16 ----------------
 health/health.d/retroshare.conf      | 15 ---------------
 health/health.d/riakkv.conf          | 14 --------------
 health/health.d/scaleio.conf         | 16 ----------------
 health/health.d/squid.conf           | 17 -----------------
 health/health.d/unbound.conf         | 16 ----------------
 health/health.d/vcsa.conf            | 16 ----------------
 health/health.d/vernemq.conf         | 16 ----------------
 health/health.d/web_log.conf         | 35 -----------------------------------
 health/health.d/whoisquery.conf      | 17 -----------------
 health/health.d/wmi.conf             | 18 ------------------
 health/health.d/x509check.conf       | 17 -----------------
 health/health.d/zookeeper.conf       | 17 -----------------
 35 files changed, 36 insertions(+), 550 deletions(-)
 delete mode 100644 health/health.d/am2320.conf
 delete mode 100644 health/health.d/apache.conf
 delete mode 100644 health/health.d/couchdb.conf
 create mode 100644 health/health.d/go.d.plugin.conf
 delete mode 100644 health/health.d/lighttpd.conf
 delete mode 100644 health/health.d/mongodb.conf
 delete mode 100644 health/health.d/named.conf
 delete mode 100644 health/health.d/nginx.conf
 delete mode 100644 health/health.d/nginx_plus.conf
 delete mode 100644 health/health.d/phpfpm.conf
 delete mode 100644 health/health.d/postgres.conf
 delete mode 100644 health/health.d/pulsar.conf
 create mode 100644 health/health.d/python.d.plugin.conf
 delete mode 100644 health/health.d/squid.conf
 delete mode 100644 health/health.d/zookeeper.conf

diff --git a/health/Makefile.am b/health/Makefile.am
index b963ea0cd1..7b25c07cdd 100644
--- a/health/Makefile.am
+++ b/health/Makefile.am
@@ -25,9 +25,7 @@ install-exec-local:
 healthconfigdir=$(libconfigdir)/health.d
 dist_healthconfig_DATA = \
     health.d/adaptec_raid.conf \
-    health.d/am2320.conf \
     health.d/anomalies.conf \
-    health.d/apache.conf \
     health.d/apcupsd.conf \
     health.d/backend.conf \
     health.d/bcache.conf \
@@ -39,7 +37,6 @@ dist_healthconfig_DATA = \
     health.d/cgroups.conf \
     health.d/cpu.conf \
     health.d/cockroachdb.conf \
-    health.d/couchdb.conf \
     health.d/disks.conf \
     health.d/dnsmasq_dhcp.conf \
     health.d/dns_query.conf \
@@ -51,6 +48,7 @@ dist_healthconfig_DATA = \
     health.d/ioping.conf \
     health.d/fronius.conf \
     health.d/gearman.conf \
+    health.d/go.d.plugin.conf \
     health.d/haproxy.conf \
     health.d/hdfs.conf \
     health.d/httpcheck.conf \
@@ -59,26 +57,19 @@ dist_healthconfig_DATA = \
     health.d/ipmi.conf \
     health.d/isc_dhcpd.conf \
     health.d/kubelet.conf \
-    health.d/lighttpd.conf \
     health.d/linux_power_supply.conf \
     health.d/load.conf \
     health.d/mdstat.conf \
     health.d/megacli.conf \
     health.d/memcached.conf \
     health.d/memory.conf \
-    health.d/mongodb.conf \
     health.d/mysql.conf \
-    health.d/named.conf \
     health.d/net.conf \
     health.d/netfilter.conf \
-    health.d/nginx.conf \
-    health.d/nginx_plus.conf \
     health.d/pihole.conf \
-    health.d/phpfpm.conf \
     health.d/portcheck.conf \
-    health.d/postgres.conf \
     health.d/processes.conf \
-    health.d/pulsar.conf \
+    health.d/python.d.plugin.conf \
     health.d/qos.conf \
     health.d/ram.conf \
     health.d/redis.conf \
@@ -86,7 +77,6 @@ dist_healthconfig_DATA = \
     health.d/riakkv.conf \
     health.d/scaleio.conf \
     health.d/softnet.conf \
-    health.d/squid.conf \
     health.d/stiebeleltron.conf \
     health.d/synchronization.conf \
     health.d/swap.conf \
@@ -107,6 +97,5 @@ dist_healthconfig_DATA = \
     health.d/wmi.conf \
     health.d/x509check.conf \
     health.d/zfs.conf \
-    health.d/zookeeper.conf \
     health.d/dbengine.conf \
     $(NULL)
diff --git a/health/health.d/am2320.conf b/health/health.d/am2320.conf
deleted file mode 100644
index 4bac98fbbb..0000000000
--- a/health/health.d/am2320.conf
+++ /dev/null
@@ -1,15 +0,0 @@
-# make sure am2320 is sending stats
-
- template: am2320_last_collected_secs
-       on: am2320.temperature
-    class: Other
-component: Sensors
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
diff --git a/health/health.d/apache.conf b/health/health.d/apache.conf
deleted file mode 100644
index c623fb8801..0000000000
--- a/health/health.d/apache.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure apache is running
-
- template: apache_last_collected_secs
-       on: apache.requests
-    class: Web Server
-component: Apache
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/cockroachdb.conf b/health/health.d/cockroachdb.conf
index dccd2b0644..2c913a2cf8 100644
--- a/health/health.d/cockroachdb.conf
+++ b/health/health.d/cockroachdb.conf
@@ -1,20 +1,4 @@
 
-# Availability
-
- template: cockroachdb_last_collected_secs
-       on: cockroachdb.live_nodes
-    class: Database
-component: CockroachDB
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
 # Capacity
 
  template: cockroachdb_used_storage_capacity
diff --git a/health/health.d/couchdb.conf b/health/health.d/couchdb.conf
deleted file mode 100644
index c86c6b9887..0000000000
--- a/health/health.d/couchdb.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# make sure couchdb is running
-
- template: couchdb_last_collected_secs
-       on: couchdb.request_methods
-    class: Database
-component: CouchDB
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
diff --git a/health/health.d/gearman.conf b/health/health.d/gearman.conf
index e2031bf2b9..289e5fbb58 100644
--- a/health/health.d/gearman.conf
+++ b/health/health.d/gearman.conf
@@ -1,17 +1,3 @@
-# make sure Gearman is running
- template: gearman_last_collected_secs
-       on: gearman.total_jobs
-    class: Computing
-component: Gearman
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
  template: gearman_workers_queued
        on: gearman.single_job
diff --git a/health/health.d/go.d.plugin.conf b/health/health.d/go.d.plugin.conf
new file mode 100644
index 0000000000..ecd79c208f
--- /dev/null
+++ b/health/health.d/go.d.plugin.conf
@@ -0,0 +1,17 @@
+
+# make sure go.d.plugin data collection job is running
+
+ template: go.d_job_last_collected_secs
+       on: netdata.go_plugin_execution_time
+    class: Netdata
+component: go.d.plugin
+     type: Error
+   module: *
+     calc: $now - $last_collected_t
+    units: seconds ago
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+    delay: down 5m multiplier 1.5 max 1h
+     info: number of seconds since the last successful data collection
+       to: webmaster
diff --git a/health/health.d/hdfs.conf b/health/health.d/hdfs.conf
index bd8308bedc..c67bf11dae 100644
--- a/health/health.d/hdfs.conf
+++ b/health/health.d/hdfs.conf
@@ -1,21 +1,4 @@
 
-# make sure hdfs is running
-
- template: hdfs_last_collected_secs
-       on: hdfs.heap_memory
-    class: Storage
-component: HDFS
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
 # Common
 
  template: hdfs_capacity_usage
diff --git a/health/health.d/httpcheck.conf b/health/health.d/httpcheck.conf
index d4d6376a3f..39fc95a2c8 100644
--- a/health/health.d/httpcheck.conf
+++ b/health/health.d/httpcheck.conf
@@ -1,17 +1,3 @@
- template: httpcheck_last_collected_secs
- families: *
-       on: httpcheck.status
-    class: Other
-component: HTTP endpoint
-     type: Latency
-     calc: $now - $last_collected_t
-    every: 10s
-    units: seconds ago
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
 # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
  template: httpcheck_web_service_up
diff --git a/health/health.d/lighttpd.conf b/health/health.d/lighttpd.conf
deleted file mode 100644
index 0f067549e7..0000000000
--- a/health/health.d/lighttpd.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure lighttpd is running
-
- template: lighttpd_last_collected_secs
-       on: lighttpd.requests
-    class: Web Server
-component: Lighttpd
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/memcached.conf b/health/health.d/memcached.conf
index f4b734c381..1efad98a05 100644
--- a/health/health.d/memcached.conf
+++ b/health/health.d/memcached.conf
@@ -1,21 +1,4 @@
 
-# make sure memcached is running
-
- template: memcached_last_collected_secs
-       on: memcached.cache
-    class: KV Storage
-component: Memcached
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
-
 # detect if memcached cache is full
 
  template: memcached_cache_memory_usage
diff --git a/health/health.d/mongodb.conf b/health/health.d/mongodb.conf
deleted file mode 100644
index 8c9bdeb6fe..0000000000
--- a/health/health.d/mongodb.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# make sure mongodb is running
-
- template: mongodb_last_collected_secs
-       on: mongodb.read_operations
-    class: Database
-component: MongoDB
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
diff --git a/health/health.d/mysql.conf b/health/health.d/mysql.conf
index 91860c4a77..a30e1b3bc3 100644
--- a/health/health.d/mysql.conf
+++ b/health/health.d/mysql.conf
@@ -1,22 +1,4 @@
 
-# make sure mysql is running
-
- template: mysql_last_collected_secs
-       on: mysql.queries
-    class: Database
-component: MySQL
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
-
-# -----------------------------------------------------------------------------
 # slow queries
 
  template: mysql_10s_slow_queries
diff --git a/health/health.d/named.conf b/health/health.d/named.conf
deleted file mode 100644
index 90266df167..0000000000
--- a/health/health.d/named.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure named is running
-
- template: named_last_collected_secs
-       on: named.global_queries
-    class: DNS
-component: BIND
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: domainadmin
-
diff --git a/health/health.d/nginx.conf b/health/health.d/nginx.conf
deleted file mode 100644
index 30c738f47d..0000000000
--- a/health/health.d/nginx.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure nginx is running
-
- template: nginx_last_collected_secs
-       on: nginx.requests
-    class: Web Server
-component: NGINX
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/nginx_plus.conf b/health/health.d/nginx_plus.conf
deleted file mode 100644
index 5849a9e7ed..0000000000
--- a/health/health.d/nginx_plus.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure nginx_plus is running
-
- template: nginx_plus_last_collected_secs
-       on: nginx_plus.requests_total
-    class: Web Server
-component: NGINX Plus
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/phpfpm.conf b/health/health.d/phpfpm.conf
deleted file mode 100644
index fc073a9442..0000000000
--- a/health/health.d/phpfpm.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure phpfpm is running
-
- template: phpfpm_last_collected_secs
-       on: phpfpm.requests
-    class: Web Server
-component: PHP-FPM
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
diff --git a/health/health.d/pihole.conf b/health/health.d/pihole.conf
index 72622caed2..429ff17849 100644
--- a/health/health.d/pihole.conf
+++ b/health/health.d/pihole.conf
@@ -1,20 +1,4 @@
 
-# Make sure Pi-hole is responding.
-
- template: pihole_last_collected_secs
-       on: pihole.dns_queries_total
-    class: Ad Filtering
-component: Pi-hole
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
 # Blocked DNS queries.
 
  template: pihole_blocked_queries
diff --git a/health/health.d/portcheck.conf b/health/health.d/portcheck.conf
index b977dbb310..d864b7b82b 100644
--- a/health/health.d/portcheck.conf
+++ b/health/health.d/portcheck.conf
@@ -1,17 +1,3 @@
- template: portcheck_last_collected_secs
- families: *
-       on: portcheck.status
-    class: Other
-component: TCP endpoint
-     type: Latency
-     calc: $now - $last_collected_t
-    every: 10s
-    units: seconds ago
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
 # This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
  template: portcheck_service_reachable
diff --git a/health/health.d/postgres.conf b/health/health.d/postgres.conf
deleted file mode 100644
index f908a802a1..0000000000
--- a/health/health.d/postgres.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# make sure postgres is running
-
- template: postgres_last_collected_secs
-       on: postgres.db_stat_transactions
-    class: Database
-component: PostgreSQL
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
diff --git a/health/health.d/pulsar.conf b/health/health.d/pulsar.conf
deleted file mode 100644
index 9903d4e381..0000000000
--- a/health/health.d/pulsar.conf
+++ /dev/null
@@ -1,16 +0,0 @@
-
-# Availability
-
- template: pulsar_last_collected_secs
-       on: pulsar.broker_components
-    class: Messaging
-component: Pulsar
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
diff --git a/health/health.d/python.d.plugin.conf b/health/health.d/python.d.plugin.conf
new file mode 100644
index 0000000000..7a3ebe1d26
--- /dev/null
+++ b/health/health.d/python.d.plugin.conf
@@ -0,0 +1,17 @@
+
+# make sure python.d.plugin data collection job is running
+
+ template: python.d_job_last_collected_secs
+       on: netdata.pythond_runtime
+    class: Netdata
+component: python.d.plugin
+     type: Error
+   module: *
+     calc: $now - $last_collected_t
+    units: seconds ago
+    every: 10s
+     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
+     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+    delay: down 5m multiplier 1.5 max 1h
+     info: number of seconds since the last successful data collection
+       to: webmaster
diff --git a/health/health.d/redis.conf b/health/health.d/redis.conf
index e8b2899427..d597f0be91 100644
--- a/health/health.d/redis.conf
+++ b/health/health.d/redis.conf
@@ -1,20 +1,4 @@
 
-# make sure redis is running
-
- template: redis_last_collected_secs
-       on: redis.operations
-    class: KV Storage
-component: Redis
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
-
  template: redis_bgsave_broken
  families: *
        on: redis.bgsave_health
diff --git a/health/health.d/retroshare.conf b/health/health.d/retroshare.conf
index ca22e60dea..6b3ab9dc3a 100644
--- a/health/health.d/retroshare.conf
+++ b/health/health.d/retroshare.conf
@@ -1,18 +1,3 @@
-# make sure RetroShare is running
-
- template: retroshare_last_collected_secs
-       on: retroshare.peers
-    class: Data Sharing
-component: Retroshare
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
 
 # make sure the DHT is fine when active
 
diff --git a/health/health.d/riakkv.conf b/health/health.d/riakkv.conf
index b2c0e8d9c3..b390840843 100644
--- a/health/health.d/riakkv.conf
+++ b/health/health.d/riakkv.conf
@@ -1,17 +1,3 @@
-# Ensure that Riak is running.  template: riak_last_collected_secs
- template: riakkv_last_collected_secs
-       on: riak.kv.throughput
-    class: Database
-component: Riak KV
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: dba
 
 # Warn if a list keys operation is running.
  template: riakkv_list_keys_active
diff --git a/health/health.d/scaleio.conf b/health/health.d/scaleio.conf
index 3c0dc1168f..894ee0d754 100644
--- a/health/health.d/scaleio.conf
+++ b/health/health.d/scaleio.conf
@@ -1,20 +1,4 @@
 
-# make sure scaleio is running
-
- template: scaleio_last_collected_secs
-       on: scaleio.system_capacity_total
-    class: Storage
-component: ScaleIO
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # make sure Storage Pool capacity utilization is under limit
 
  template: scaleio_storage_pool_capacity_utilization
diff --git a/health/health.d/squid.conf b/health/health.d/squid.conf
deleted file mode 100644
index 5c3d176294..0000000000
--- a/health/health.d/squid.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure squid is running
-
- template: squid_last_collected_secs
-       on: squid.clients_requests
-    class: Web Proxy
-component: Squid
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: proxyadmin
-
diff --git a/health/health.d/unbound.conf b/health/health.d/unbound.conf
index 1df15474f0..cd9ffa4313 100644
--- a/health/health.d/unbound.conf
+++ b/health/health.d/unbound.conf
@@ -1,20 +1,4 @@
 
-# make sure unbound is running
-
- template: unbound_last_collected_secs
-       on: unbound.queries
-    class: DNS
-component: Unbound
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # make sure there is no overwritten/dropped queries in the request-list
 
  template: unbound_request_list_overwritten
diff --git a/health/health.d/vcsa.conf b/health/health.d/vcsa.conf
index 8538e488cb..42eb394c77 100644
--- a/health/health.d/vcsa.conf
+++ b/health/health.d/vcsa.conf
@@ -1,20 +1,4 @@
 
-# make sure vcsa is running and responding
-
- template: vcsa_last_collected_secs
-       on: vcsa.system_health
-    class: Virtual Machine
-component: VMware vCenter
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # Overall system health:
 #  - 0: all components are healthy.
 #  - 1: one or more components might become overloaded soon.
diff --git a/health/health.d/vernemq.conf b/health/health.d/vernemq.conf
index 737147f38f..ce608e266d 100644
--- a/health/health.d/vernemq.conf
+++ b/health/health.d/vernemq.conf
@@ -1,20 +1,4 @@
 
-# Availability
-
- template: vernemq_last_collected_secs
-       on: vernemq.node_uptime
-    class: Messaging
-component: VerneMQ
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 # Socket errors
 
  template: vernemq_socket_errors
diff --git a/health/health.d/web_log.conf b/health/health.d/web_log.conf
index c1237fa06d..2cad70d040 100644
--- a/health/health.d/web_log.conf
+++ b/health/health.d/web_log.conf
@@ -1,22 +1,4 @@
 
-# make sure we can collect web log data
-
- template: last_collected_secs
-       on: web_log.response_codes
-    class: Web Server
-component: Web log
-     type: Latency
- families: *
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
 # -----------------------------------------------------------------------------
 # high level response code alarms
 
@@ -231,23 +213,6 @@ component: Web log
 
 # ---------------------------------------------------GO-VERSION---------------------------------------------------------
 
-# make sure we can collect web log data
-
- template: web_log_last_collected_secs
-       on: web_log.requests
-    class: Web Server
-component: Web log
-     type: Latency
- families: *
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
 # unmatched lines
 
 # the following alarms trigger only when there are enough data.
diff --git a/health/health.d/whoisquery.conf b/health/health.d/whoisquery.conf
index c6d3a9de04..6666229463 100644
--- a/health/health.d/whoisquery.conf
+++ b/health/health.d/whoisquery.conf
@@ -1,21 +1,4 @@
 
-# make sure whoisquery is running
-
- template: whoisquery_last_collected_secs
-       on: whoisquery.time_until_expiration
-    class: Other
-component: WHOIS
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 60s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
  template: whoisquery_days_until_expiration
        on: whoisquery.time_until_expiration
     class: Other
diff --git a/health/health.d/wmi.conf b/health/health.d/wmi.conf
index 6bd4e077f7..35a93c3938 100644
--- a/health/health.d/wmi.conf
+++ b/health/health.d/wmi.conf
@@ -1,22 +1,4 @@
 
-# you can disable an alarm notification by setting the 'to' line to: silent
-
-## Availability
-
- template: wmi_last_collected_secs
-       on: cpu.collector_duration
-    class: Windows
-component: Availability
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: sysadmin
-
 ## CPU
 
  template: wmi_10min_cpu_usage
diff --git a/health/health.d/x509check.conf b/health/health.d/x509check.conf
index 93c406b7a4..d97b694c86 100644
--- a/health/health.d/x509check.conf
+++ b/health/health.d/x509check.conf
@@ -1,21 +1,4 @@
 
-# make sure x509check is running
-
- template: x509check_last_collected_secs
-       on: x509check.time_until_expiration
-    class: Certificates
-component: x509 certificates
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 60s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-
  template: x509check_days_until_expiration
        on: x509check.time_until_expiration
     class: Certificates
diff --git a/health/health.d/zookeeper.conf b/health/health.d/zookeeper.conf
deleted file mode 100644
index 8c7d5a73d8..0000000000
--- a/health/health.d/zookeeper.conf
+++ /dev/null
@@ -1,17 +0,0 @@
-
-# make sure zookeeper is running
-
- template: zookeeper_last_collected_secs
-       on: zookeeper.requests
-    class: KV Storage
-component: ZooKeeper
-     type: Latency
-     calc: $now - $last_collected_t
-    units: seconds ago
-    every: 10s
-     warn: $this > (($status >= $WARNING)  ? ($update_every) : ( 5 * $update_every))
-     crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
-    delay: down 5m multiplier 1.5 max 1h
-     info: number of seconds since the last successful data collection
-       to: webmaster
-
-- 
cgit v1.2.3