summaryrefslogtreecommitdiffstats
path: root/health
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@tsaousis.gr>2018-10-15 23:16:42 +0300
committerGitHub <noreply@github.com>2018-10-15 23:16:42 +0300
commit8fbf817ef83b3524b15f908251909d9d6feb5532 (patch)
tree4c2d417b7392c907bbdbe355b8db361bd3741a02 /health
parent1ad4f1bcfc691120102b57dbd426de0870abd76f (diff)
modularized all source code (#4391)
* modularized all external plugins * added README.md in plugins * fixed title * fixed typo * relative link to external plugins * external plugins configuration README * added plugins link * remove plugins link * plugin names are links * added links to external plugins * removed unecessary spacing * list to table * added language * fixed typo * list to table on internal plugins * added more documentation to internal plugins * moved python, node, and bash code and configs into the external plugins * added statsd README * fix bug with corrupting config.h every 2nd compilation * moved all config files together with their code * more documentation * diskspace info * fixed broken links in apps.plugin * added backends docs * updated plugins readme * move nc-backend.sh to backends * created daemon directory * moved all code outside src/ * fixed readme identation * renamed plugins.d.plugin to plugins.d * updated readme * removed linux- from linux plugins * updated readme * updated readme * updated readme * updated readme * updated readme * updated readme * fixed README.md links * fixed netdata tree links * updated codacy, codeclimate and lgtm excluded paths * update CMakeLists.txt * updated automake options at top directory * libnetdata slit into directories * updated READMEs * updated READMEs * updated ARL docs * updated ARL docs * moved /plugins to /collectors * moved all external plugins outside plugins.d * updated codacy, codeclimate, lgtm * updated README * updated url * updated readme * updated readme * updated readme * updated readme * moved api and web into webserver * web/api web/gui web/server * modularized webserver * removed web/gui/version.txt
Diffstat (limited to 'health')
-rw-r--r--health/Makefile.am87
-rw-r--r--health/README.md0
-rwxr-xr-xhealth/alarm-email.sh7
-rwxr-xr-xhealth/alarm-notify.sh.in2378
-rwxr-xr-xhealth/alarm-test.sh12
-rw-r--r--health/health.c751
-rw-r--r--health/health.d/apache.conf14
-rw-r--r--health/health.d/apcupsd.conf40
-rw-r--r--health/health.d/backend.conf45
-rw-r--r--health/health.d/bcache.conf22
-rw-r--r--health/health.d/beanstalkd.conf36
-rw-r--r--health/health.d/bind_rndc.conf9
-rw-r--r--health/health.d/boinc.conf62
-rw-r--r--health/health.d/btrfs.conf57
-rw-r--r--health/health.d/ceph.conf13
-rw-r--r--health/health.d/couchdb.conf13
-rw-r--r--health/health.d/cpu.conf55
-rw-r--r--health/health.d/disks.conf167
-rw-r--r--health/health.d/dockerd.conf8
-rw-r--r--health/health.d/elasticsearch.conf9
-rw-r--r--health/health.d/entropy.conf16
-rw-r--r--health/health.d/fping.conf53
-rw-r--r--health/health.d/fronius.conf11
-rw-r--r--health/health.d/haproxy.conf27
-rw-r--r--health/health.d/httpcheck.conf99
-rw-r--r--health/health.d/ipc.conf28
-rw-r--r--health/health.d/ipfs.conf11
-rw-r--r--health/health.d/ipmi.conf20
-rw-r--r--health/health.d/isc_dhcpd.conf10
-rw-r--r--health/health.d/lighttpd.conf14
-rw-r--r--health/health.d/linux_power_supply.conf12
-rw-r--r--health/health.d/load.conf56
-rw-r--r--health/health.d/mdstat.conf27
-rw-r--r--health/health.d/megacli.conf48
-rw-r--r--health/health.d/memcached.conf52
-rw-r--r--health/health.d/memory.conf38
-rw-r--r--health/health.d/mongodb.conf13
-rw-r--r--health/health.d/mysql.conf100
-rw-r--r--health/health.d/named.conf14
-rw-r--r--health/health.d/net.conf122
-rw-r--r--health/health.d/netfilter.conf29
-rw-r--r--health/health.d/nginx.conf14
-rw-r--r--health/health.d/nginx_plus.conf14
-rw-r--r--health/health.d/portcheck.conf48
-rw-r--r--health/health.d/postgres.conf13
-rw-r--r--health/health.d/qos.conf18
-rw-r--r--health/health.d/ram.conf64
-rw-r--r--health/health.d/redis.conf34
-rw-r--r--health/health.d/retroshare.conf25
-rw-r--r--health/health.d/softnet.conf40
-rw-r--r--health/health.d/squid.conf14
-rw-r--r--health/health.d/stiebeleltron.conf11
-rw-r--r--health/health.d/swap.conf43
-rw-r--r--health/health.d/tcp_conn.conf19
-rw-r--r--health/health.d/tcp_listen.conf82
-rw-r--r--health/health.d/tcp_mem.conf20
-rw-r--r--health/health.d/tcp_orphans.conf21
-rw-r--r--health/health.d/tcp_resets.conf67
-rw-r--r--health/health.d/udp_errors.conf49
-rw-r--r--health/health.d/varnish.conf9
-rw-r--r--health/health.d/web_log.conf163
-rw-r--r--health/health.d/zfs.conf10
-rw-r--r--health/health.h76
-rwxr-xr-xhealth/health_alarm_notify.conf961
-rw-r--r--health/health_config.c859
-rw-r--r--health/health_email_recipients.conf2
-rw-r--r--health/health_json.c262
-rw-r--r--health/health_log.c465
68 files changed, 7988 insertions, 0 deletions
diff --git a/health/Makefile.am b/health/Makefile.am
new file mode 100644
index 0000000000..6f09b2e25f
--- /dev/null
+++ b/health/Makefile.am
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+AUTOMAKE_OPTIONS = subdir-objects
+MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
+
+CLEANFILES = \
+ alarm-notify.sh \
+ $(NULL)
+
+include $(top_srcdir)/build/subst.inc
+SUFFIXES = .in
+
+dist_libconfig_DATA = \
+ health_alarm_notify.conf \
+ health_email_recipients.conf \
+ $(NULL)
+
+dist_plugins_SCRIPTS = \
+ alarm-notify.sh \
+ alarm-email.sh \
+ alarm-test.sh \
+ $(NULL)
+
+dist_noinst_DATA = \
+ alarm-notify.sh.in \
+ README.md \
+ $(NULL)
+
+healthconfigdir=$(libconfigdir)/health.d
+dist_healthconfig_DATA = \
+ health.d/apache.conf \
+ health.d/apcupsd.conf \
+ health.d/backend.conf \
+ health.d/bcache.conf \
+ health.d/beanstalkd.conf \
+ health.d/bind_rndc.conf \
+ health.d/boinc.conf \
+ health.d/btrfs.conf \
+ health.d/ceph.conf \
+ health.d/cpu.conf \
+ health.d/couchdb.conf \
+ health.d/disks.conf \
+ health.d/dockerd.conf \
+ health.d/elasticsearch.conf \
+ health.d/entropy.conf \
+ health.d/fping.conf \
+ health.d/fronius.conf \
+ health.d/haproxy.conf \
+ health.d/httpcheck.conf \
+ health.d/ipc.conf \
+ health.d/ipfs.conf \
+ health.d/ipmi.conf \
+ health.d/isc_dhcpd.conf \
+ health.d/lighttpd.conf \
+ health.d/linux_power_supply.conf \
+ health.d/load.conf \
+ health.d/mdstat.conf \
+ health.d/megacli.conf \
+ health.d/memcached.conf \
+ health.d/memory.conf \
+ health.d/mongodb.conf \
+ health.d/mysql.conf \
+ health.d/named.conf \
+ health.d/net.conf \
+ health.d/netfilter.conf \
+ health.d/nginx.conf \
+ health.d/nginx_plus.conf \
+ health.d/portcheck.conf \
+ health.d/postgres.conf \
+ health.d/qos.conf \
+ health.d/ram.conf \
+ health.d/redis.conf \
+ health.d/retroshare.conf \
+ health.d/softnet.conf \
+ health.d/squid.conf \
+ health.d/stiebeleltron.conf \
+ health.d/swap.conf \
+ health.d/tcp_conn.conf \
+ health.d/tcp_listen.conf \
+ health.d/tcp_mem.conf \
+ health.d/tcp_orphans.conf \
+ health.d/tcp_resets.conf \
+ health.d/udp_errors.conf \
+ health.d/varnish.conf \
+ health.d/web_log.conf \
+ health.d/zfs.conf \
+ $(NULL)
diff --git a/health/README.md b/health/README.md
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/health/README.md
diff --git a/health/alarm-email.sh b/health/alarm-email.sh
new file mode 100755
index 0000000000..69c4c3f8df
--- /dev/null
+++ b/health/alarm-email.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# OBSOLETE - REPLACED WITH
+# alarm-notify.sh
+
+${0/alarm-email.sh/alarm-notify.sh} "${@}"
diff --git a/health/alarm-notify.sh.in b/health/alarm-notify.sh.in
new file mode 100755
index 0000000000..4aef3a521a
--- /dev/null
+++ b/health/alarm-notify.sh.in
@@ -0,0 +1,2378 @@
+#!/usr/bin/env bash
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2017 Costa Tsaousis <costa@tsaousis.gr>
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# Script to send alarm notifications for netdata
+#
+# Features:
+# - multiple notification methods
+# - multiple roles per alarm
+# - multiple recipients per role
+# - severity filtering per recipient
+#
+# Supported notification methods:
+# - emails by @ktsaou
+# - slack.com notifications by @ktsaou
+# - alerta.io notifications by @kattunga
+# - discordapp.com notifications by @lowfive
+# - pushover.net notifications by @ktsaou
+# - pushbullet.com push notifications by Tiago Peralta @tperalta82 #1070
+# - telegram.org notifications by @hashworks #1002
+# - twilio.com notifications by Levi Blaney @shadycuz #1211
+# - kafka notifications by @ktsaou #1342
+# - pagerduty.com notifications by Jim Cooley @jimcooley #1373
+# - messagebird.com notifications by @tech_no_logical #1453
+# - hipchat notifications by @ktsaou #1561
+# - fleep notifications by @Ferroin
+# - custom notifications by @ktsaou
+# - syslog messages by @Ferroin
+# - Microsoft Team notification by @tioumen
+
+# -----------------------------------------------------------------------------
+# testing notifications
+
+
+if [ \( "${1}" = "test" -o "${2}" = "test" \) -a "${#}" -le 2 ]
+then
+ if [ "${2}" = "test" ]
+ then
+ recipient="${1}"
+ else
+ recipient="${2}"
+ fi
+
+ [ -z "${recipient}" ] && recipient="sysadmin"
+
+ id=1
+ last="CLEAR"
+ test_res=0
+ for x in "WARNING" "CRITICAL" "CLEAR"
+ do
+ echo >&2
+ echo >&2 "# SENDING TEST ${x} ALARM TO ROLE: ${recipient}"
+
+ "${0}" "${recipient}" "$(hostname)" 1 1 "${id}" "$(date +%s)" "test_alarm" "test.chart" "test.family" "${x}" "${last}" 100 90 "${0}" 1 $((0 + id)) "units" "this is a test alarm to verify notifications work" "new value" "old value"
+ if [ $? -ne 0 ]
+ then
+ echo >&2 "# FAILED"
+ test_res=1
+ else
+ echo >&2 "# OK"
+ fi
+
+ last="${x}"
+ id=$((id + 1))
+ done
+
+ exit $test_res
+fi
+
+export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin"
+export LC_ALL=C
+
+# -----------------------------------------------------------------------------
+
+PROGRAM_NAME="$(basename "${0}")"
+
+logdate() {
+ date "+%Y-%m-%d %H:%M:%S"
+}
+
+log() {
+ local status="${1}"
+ shift
+
+ echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}"
+
+}
+
+warning() {
+ log WARNING "${@}"
+}
+
+error() {
+ log ERROR "${@}"
+}
+
+info() {
+ log INFO "${@}"
+}
+
+fatal() {
+ log FATAL "${@}"
+ exit 1
+}
+
+debug=${NETDATA_ALARM_NOTIFY_DEBUG-0}
+debug() {
+ [ "${debug}" = "1" ] && log DEBUG "${@}"
+}
+
+docurl() {
+ if [ -z "${curl}" ]
+ then
+ error "\${curl} is unset."
+ return 1
+ fi
+
+ if [ "${debug}" = "1" ]
+ then
+ echo >&2 "--- BEGIN curl command ---"
+ printf >&2 "%q " ${curl} "${@}"
+ echo >&2
+ echo >&2 "--- END curl command ---"
+
+ local out=$(mktemp /tmp/netdata-health-alarm-notify-XXXXXXXX)
+ local code=$(${curl} ${curl_options} --write-out %{http_code} --output "${out}" --silent --show-error "${@}")
+ local ret=$?
+ echo >&2 "--- BEGIN received response ---"
+ cat >&2 "${out}"
+ echo >&2
+ echo >&2 "--- END received response ---"
+ echo >&2 "RECEIVED HTTP RESPONSE CODE: ${code}"
+ rm "${out}"
+ echo "${code}"
+ return ${ret}
+ fi
+
+ ${curl} ${curl_options} --write-out %{http_code} --output /dev/null --silent --show-error "${@}"
+ return $?
+}
+
+# -----------------------------------------------------------------------------
+# this is to be overwritten by the config file
+
+custom_sender() {
+ info "not sending custom notification for ${status} of '${host}.${chart}.${name}'"
+}
+
+
+# -----------------------------------------------------------------------------
+
+# check for BASH v4+ (required for associative arrays)
+[ $(( ${BASH_VERSINFO[0]} )) -lt 4 ] && \
+ fatal "BASH version 4 or later is required (this is ${BASH_VERSION})."
+
+# -----------------------------------------------------------------------------
+# defaults to allow running this script by hand
+
+[ -z "${NETDATA_USER_CONFIG_DIR}" ] && NETDATA_USER_CONFIG_DIR="@configdir_POST@"
+[ -z "${NETDATA_STOCK_CONFIG_DIR}" ] && NETDATA_STOCK_CONFIG_DIR="@libconfigdir_POST@"
+[ -z "${NETDATA_CACHE_DIR}" ] && NETDATA_CACHE_DIR="@cachedir_POST@"
+[ -z "${NETDATA_REGISTRY_URL}" ] && NETDATA_REGISTRY_URL="https://registry.my-netdata.io"
+
+# -----------------------------------------------------------------------------
+# parse command line parameters
+
+roles="${1}" # the roles that should be notified for this event
+host="${2}" # the host generated this event
+unique_id="${3}" # the unique id of this event
+alarm_id="${4}" # the unique id of the alarm that generated this event
+event_id="${5}" # the incremental id of the event, for this alarm id
+when="${6}" # the timestamp this event occurred
+name="${7}" # the name of the alarm, as given in netdata health.d entries
+chart="${8}" # the name of the chart (type.id)
+family="${9}" # the family of the chart
+status="${10}" # the current status : REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL
+old_status="${11}" # the previous status: REMOVED, UNINITIALIZED, UNDEFINED, CLEAR, WARNING, CRITICAL
+value="${12}" # the current value of the alarm
+old_value="${13}" # the previous value of the alarm
+src="${14}" # the line number and file the alarm has been configured
+duration="${15}" # the duration in seconds of the previous alarm state
+non_clear_duration="${16}" # the total duration in seconds this is/was non-clear
+units="${17}" # the units of the value
+info="${18}" # a short description of the alarm
+value_string="${19}" # friendly value (with units)
+old_value_string="${20}" # friendly old value (with units)
+
+# -----------------------------------------------------------------------------
+# find a suitable hostname to use, if netdata did not supply a hostname
+
+this_host=$(hostname -s 2>/dev/null)
+[ -z "${host}" ] && host="${this_host}"
+
+# -----------------------------------------------------------------------------
+# screen statuses we don't need to send a notification
+
+# don't do anything if this is not WARNING, CRITICAL or CLEAR
+if [ "${status}" != "WARNING" -a "${status}" != "CRITICAL" -a "${status}" != "CLEAR" ]
+then
+ info "not sending notification for ${status} of '${host}.${chart}.${name}'"
+ exit 1
+fi
+
+# don't do anything if this is CLEAR, but it was not WARNING or CRITICAL
+if [ "${old_status}" != "WARNING" -a "${old_status}" != "CRITICAL" -a "${status}" = "CLEAR" ]
+then
+ info "not sending notification for ${status} of '${host}.${chart}.${name}' (last status was ${old_status})"
+ exit 1
+fi
+
+# -----------------------------------------------------------------------------
+# load configuration
+
+# By default fetch images from the global public registry.
+# This is required by default, since all notification methods need to download
+# images via the Internet, and private registries might not be reachable.
+# This can be overwritten at the configuration file.
+images_base_url="https://registry.my-netdata.io"
+
+# curl options to use
+curl_options=""
+
+# needed commands
+# if empty they will be searched in the system path
+curl=
+sendmail=
+
+# enable / disable features
+SEND_SLACK="YES"
+SEND_MSTEAM="YES"
+SEND_ALERTA="YES"
+SEND_FLOCK="YES"
+SEND_DISCORD="YES"
+SEND_PUSHOVER="YES"
+SEND_TWILIO="YES"
+SEND_HIPCHAT="YES"
+SEND_MESSAGEBIRD="YES"
+SEND_KAVENEGAR="YES"
+SEND_TELEGRAM="YES"
+SEND_EMAIL="YES"
+SEND_PUSHBULLET="YES"
+SEND_KAFKA="YES"
+SEND_PD="YES"
+SEND_FLEEP="YES"
+SEND_IRC="YES"
+SEND_AWSSNS="YES"
+SEND_SYSLOG="NO"
+SEND_CUSTOM="YES"
+
+# slack configs
+SLACK_WEBHOOK_URL=
+DEFAULT_RECIPIENT_SLACK=
+declare -A role_recipients_slack=()
+
+# Microsoft Team configs
+MSTEAM_WEBHOOK_URL=
+DEFAULT_RECIPIENT_MSTEAM=
+declare -A role_recipients_msteam=()
+
+# rocketchat configs
+ROCKETCHAT_WEBHOOK_URL=
+DEFAULT_RECIPIENT_ROCKETCHAT=
+declare -A role_recipients_rocketchat=()
+
+# alerta configs
+ALERTA_WEBHOOK_URL=
+ALERTA_API_KEY=
+DEFAULT_RECIPIENT_ALERTA=
+declare -A role_recipients_alerta=()
+
+# flock configs
+FLOCK_WEBHOOK_URL=
+DEFAULT_RECIPIENT_FLOCK=
+declare -A role_recipients_flock=()
+
+# discord configs
+DISCORD_WEBHOOK_URL=
+DEFAULT_RECIPIENT_DISCORD=
+declare -A role_recipients_discord=()
+
+# pushover configs
+PUSHOVER_APP_TOKEN=
+DEFAULT_RECIPIENT_PUSHOVER=
+declare -A role_recipients_pushover=()
+
+# pushbullet configs
+PUSHBULLET_ACCESS_TOKEN=
+PUSHBULLET_SOURCE_DEVICE=
+DEFAULT_RECIPIENT_PUSHBULLET=
+declare -A role_recipients_pushbullet=()
+
+# twilio configs
+TWILIO_ACCOUNT_SID=
+TWILIO_ACCOUNT_TOKEN=
+TWILIO_NUMBER=
+DEFAULT_RECIPIENT_TWILIO=
+declare -A role_recipients_twilio=()
+
+# hipchat configs
+HIPCHAT_SERVER=
+HIPCHAT_AUTH_TOKEN=
+DEFAULT_RECIPIENT_HIPCHAT=
+declare -A role_recipients_hipchat=()
+
+# messagebird configs
+MESSAGEBIRD_ACCESS_KEY=
+MESSAGEBIRD_NUMBER=
+DEFAULT_RECIPIENT_MESSAGEBIRD=
+declare -A role_recipients_messagebird=()
+
+# kavenegar configs
+KAVENEGAR_API_KEY=""
+KAVENEGAR_SENDER=""
+DEFAULT_RECIPIENT_KAVENEGAR=()
+declare -A role_recipients_kavenegar=""
+
+# telegram configs
+TELEGRAM_BOT_TOKEN=
+DEFAULT_RECIPIENT_TELEGRAM=
+declare -A role_recipients_telegram=()
+
+# kafka configs
+KAFKA_URL=
+KAFKA_SENDER_IP=
+
+# pagerduty.com configs
+PD_SERVICE_KEY=
+DEFAULT_RECIPIENT_PD=
+declare -A role_recipients_pd=()
+
+# fleep.io configs
+FLEEP_SENDER="${host}"
+DEFAULT_RECIPIENT_FLEEP=
+declare -A role_recipients_fleep=()
+
+# Amazon SNS configs
+DEFAULT_RECIPIENT_AWSSNS=
+AWSSNS_MESSAGE_FORMAT=
+declare -A role_recipients_awssns=()
+
+# syslog configs
+SYSLOG_FACILITY=
+declare -A role_recipients_syslog=()
+
+# custom configs
+DEFAULT_RECIPIENT_CUSTOM=
+declare -A role_recipients_custom=()
+
+# email configs
+EMAIL_SENDER=
+DEFAULT_RECIPIENT_EMAIL="root"
+EMAIL_CHARSET=$(locale charmap 2>/dev/null)
+EMAIL_THREADING=
+declare -A role_recipients_email=()
+
+# irc configs
+IRC_NICKNAME=
+IRC_REALNAME=
+DEFAULT_RECIPIENT_IRC=
+IRC_NETWORK=
+declare -A role_recipients_irc=()
+
+# load the stock and user configuration files
+# these will overwrite the variables above
+
+for CONFIG in "${NETDATA_STOCK_CONFIG_DIR}/health_alarm_notify.conf" "${NETDATA_USER_CONFIG_DIR}/health_alarm_notify.conf"
+do
+ if [ -f "${CONFIG}" ]
+ then
+ debug "Loading config file '${CONFIG}'..."
+ source "${CONFIG}"
+ [ $? -ne 0 ] && error "Failed to load config file '${CONFIG}'."
+ else
+ warning "Cannot find file '${CONFIG}'."
+ fi
+done
+
+# If we didn't autodetect the character set for e-mail and it wasn't
+# set by the user, we need to set it to a reasonable default. UTF-8
+# should be correct for almost all modern UNIX systems.
+if [ -z ${EMAIL_CHARSET} ]
+ then
+ EMAIL_CHARSET="UTF-8"
+fi
+
+# -----------------------------------------------------------------------------
+# filter a recipient based on alarm event severity
+
+filter_recipient_by_criticality() {
+ local method="${1}" x="${2}" r s
+ shift
+
+ r="${x/|*/}" # the recipient
+ s="${x/*|/}" # the severity required for notifying this recipient
+
+ # no severity filtering for this person
+ [ "${r}" = "${s}" ] && return 0
+
+ # the severity is invalid
+ s="${s^^}"
+ if [ "${s}" != "CRITICAL" ]
+ then