summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIlya Mashchenko <ilya@netdata.cloud>2019-09-17 17:25:04 +0300
committerChris Akritidis <43294513+cakrit@users.noreply.github.com>2019-09-17 16:25:04 +0200
commit74074e815bc02c5d6b28d7f6f90b9872ee5ba5de (patch)
tree4b465a57d57005ea430e5487a7ae6160a9a7b3a0
parent1ac5ec5e11c168b55ab1905524b53cad20ab2518 (diff)
vcsa collector: charts description and alarms (#6772)
* add vcsa to dashboard_info.js * add vcsa alarms * update dashboard_info.js * update dashboard_info.js * update alarms * availability alarm fix
-rw-r--r--health/Makefile.am1
-rw-r--r--health/health.d/vcsa.conf122
-rw-r--r--web/gui/dashboard_info.js40
3 files changed, 162 insertions, 1 deletions
diff --git a/health/Makefile.am b/health/Makefile.am
index 383696ef64..a682b594c1 100644
--- a/health/Makefile.am
+++ b/health/Makefile.am
@@ -85,6 +85,7 @@ dist_healthconfig_DATA = \
health.d/tcp_resets.conf \
health.d/udp_errors.conf \
health.d/varnish.conf \
+ health.d/vcsa.conf \
health.d/vsphere.conf \
health.d/web_log.conf \
health.d/wmi.conf \
diff --git a/health/health.d/vcsa.conf b/health/health.d/vcsa.conf
new file mode 100644
index 0000000000..7bb98a9bad
--- /dev/null
+++ b/health/health.d/vcsa.conf
@@ -0,0 +1,122 @@
+
+# make sure vcsa is running and responding
+
+template: vcsa_last_collected_secs
+ on: vcsa.system_health
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sysadmin
+
+# Overall system health:
+# - 0: all components are healthy.
+# - 1: one or more components might become overloaded soon.
+# - 2: one or more components in the appliance might be degraded.
+# - 3: one or more components might be in an unusable status and the appliance might become unresponsive soon.
+# - 4: no health data is available.
+
+template: vcsa_system_health
+ on: vcsa.system_health
+ lookup: max -10s unaligned of system
+ units: status
+ every: 10s
+ warn: ($this == 1) || ($this == 2)
+ crit: $this == 3
+ delay: down 1m multiplier 1.5 max 1h
+ info: overall system health status
+ to: sysadmin
+
+# Components health:
+# - 0: healthy.
+# - 1: healthy, but may have some problems.
+# - 2: degraded, and may have serious problems.
+# - 3: unavailable, or will stop functioning soon.
+# - 4: no health data is available.
+
+template: vcsa_swap_health
+ on: vcsa.components_health
+ lookup: max -10s unaligned of swap
+ units: status
+ every: 10s
+ warn: $this == 1
+ crit: ($this == 2) || ($this == 3)
+ delay: down 1m multiplier 1.5 max 1h
+ info: swap health status
+ to: sysadmin
+
+template: vcsa_storage_health
+ on: vcsa.components_health
+ lookup: max -10s unaligned of storage
+ units: status
+ every: 10s
+ warn: $this == 1
+ crit: ($this == 2) || ($this == 3)
+ delay: down 1m multiplier 1.5 max 1h
+ info: storage health status
+ to: sysadmin
+
+template: vcsa_mem_health
+ on: vcsa.components_health
+ lookup: max -10s unaligned of mem
+ units: status
+ every: 10s
+ warn: $this == 1
+ crit: ($this == 2) || ($this == 3)
+ delay: down 1m multiplier 1.5 max 1h
+ info: mem health status
+ to: sysadmin
+
+template: vcsa_load_health
+ on: vcsa.components_health
+ lookup: max -10s unaligned of load
+ units: status
+ every: 10s
+ warn: $this == 1
+ crit: ($this == 2) || ($this == 3)
+ delay: down 1m multiplier 1.5 max 1h
+ info: load health status
+ to: sysadmin
+
+template: vcsa_database_storage_health
+ on: vcsa.components_health
+ lookup: max -10s unaligned of database_storage
+ units: status
+ every: 10s
+ warn: $this == 1
+ crit: ($this == 2) || ($this == 3)
+ delay: down 1m multiplier 1.5 max 1h
+ info: database storage health status
+ to: sysadmin
+
+template: vcsa_applmgmt_health
+ on: vcsa.components_health
+ lookup: max -10s unaligned of applmgmt
+ units: status
+ every: 10s
+ warn: $this == 1
+ crit: ($this == 2) || ($this == 3)
+ delay: down 1m multiplier 1.5 max 1h
+ info: appl mgmt health status
+ to: sysadmin
+
+
+# Software updates health:
+# - 0: no updates available.
+# - 2: non-security updates are available.
+# - 3: security updates are available.
+# - 4: an error retrieving information on software updates.
+
+template: vcsa_software_updates_health
+ on: vcsa.software_updates_health
+ lookup: max -10s unaligned of software_packages
+ units: status
+ every: 10s
+ warn: $this == 4
+ crit: $this == 3
+ delay: down 1m multiplier 1.5 max 1h
+ info: software packages health status
+ to: sysadmin
diff --git a/web/gui/dashboard_info.js b/web/gui/dashboard_info.js
index 661d40fd8e..3f65718164 100644
--- a/web/gui/dashboard_info.js
+++ b/web/gui/dashboard_info.js
@@ -498,6 +498,12 @@ netdataDashboard.menu = {
title: 'vSphere',
icon: '<i class="fas fa-server"></i>',
info: 'Performance statistics for ESXI hosts and virtual machines. Data collected from <a href="https://www.vmware.com/products/vcenter-server.html">VMware vCenter Server</a> using <code><a href="https://github.com/vmware/govmomi"> govmomi</a></code> library.'
+ },
+
+ 'vcsa': {
+ title: 'VCSA',
+ icon: '<i class="fas fa-server"></i>',
+ info: 'vCenter Server Appliance health statistics. Data collected from <a href="https://vmware.github.io/vsphere-automation-sdk-rest/vsphere/index.html#SVC_com.vmware.appliance.health">Health API</a>.'
}
};
@@ -2587,5 +2593,37 @@ netdataDashboard.context = {
'vsphere.overall_status': {
info: '<code>0</code> is unknown, <code>1</code> is OK, <code>2</code> is might have a problem, <code>3</code> is definitely has a problem.'
- }
+ },
+
+ // ------------------------------------------------------------------------
+ // VCSA
+ 'vcsa.system_health': {
+ info:
+ '<code>-1</code>: unknown; ' +
+ '<code>0</code>: all components are healthy; ' +
+ '<code>1</code>: one or more components might become overloaded soon; ' +
+ '<code>2</code>: one or more components in the appliance might be degraded; ' +
+ '<code>3</code>: one or more components might be in an unusable status and the appliance might become unresponsive soon; ' +
+ '<code>4</code>: no health data is available.'
+ },
+
+ 'vcsa.components_health': {
+ info:
+ '<code>-1</code>: unknown; ' +
+ '<code>0</code>: healthy; ' +
+ '<code>1</code>: healthy, but may have some problems; ' +
+ '<code>2</code>: degraded, and may have serious problems; ' +
+ '<code>3</code>: unavailable, or will stop functioning soon; ' +
+ '<code>4</code>: no health data is available.'
+ },
+
+ 'vcsa.software_updates_health': {
+ info:
+ '<code>softwarepackages</code> represents information on available software updates available in the remote vSphere Update Manager repository.<br>' +
+ '<code>-1</code>: unknown; ' +
+ '<code>0</code>: no updates available; ' +
+ '<code>2</code>: non-security updates are available; ' +
+ '<code>3</code>: security updates are available; ' +
+ '<code>4</code>: an error retrieving information on software updates.'
+ }
};