summaryrefslogtreecommitdiffstats
path: root/health
diff options
context:
space:
mode:
authorthiagoftsm <thiagoftsm@gmail.com>2020-10-28 13:48:53 +0000
committerGitHub <noreply@github.com>2020-10-28 13:48:53 +0000
commit1a50899551674e0f0fa684daeadd043503be949b (patch)
tree8032cbeca41f5a23ae6f7f8197e09fcb5b015ed6 /health
parent93f94c003ef6d494e6beb05a1e6523f2a2cf1a00 (diff)
Opsgenie integration (#9879)
Bring full integration with Opsgenie.
Diffstat (limited to 'health')
-rw-r--r--health/notifications/Makefile.am1
-rwxr-xr-xhealth/notifications/alarm-notify.sh.in81
-rwxr-xr-xhealth/notifications/health_alarm_notify.conf21
-rw-r--r--health/notifications/opsgenie/Makefile.inc12
-rw-r--r--health/notifications/opsgenie/README.md59
5 files changed, 168 insertions, 6 deletions
diff --git a/health/notifications/Makefile.am b/health/notifications/Makefile.am
index 4af9f7782e..e6b42138e0 100644
--- a/health/notifications/Makefile.am
+++ b/health/notifications/Makefile.am
@@ -35,6 +35,7 @@ include hangouts/Makefile.inc
include irc/Makefile.inc
include kavenegar/Makefile.inc
include messagebird/Makefile.inc
+include opsgenie/Makefile.inc
include pagerduty/Makefile.inc
include pushbullet/Makefile.inc
include pushover/Makefile.inc
diff --git a/health/notifications/alarm-notify.sh.in b/health/notifications/alarm-notify.sh.in
index 78641f07d0..097555f42c 100755
--- a/health/notifications/alarm-notify.sh.in
+++ b/health/notifications/alarm-notify.sh.in
@@ -37,6 +37,7 @@
# - Google Hangouts Chat notifications by @EnzoAkira and @hendrikhofstadt
# - Dynatrace Event by @illumine
# - Stackpulse Event by @thiagoftsm
+# - Opsgenie by @thiaoftsm #9858
# -----------------------------------------------------------------------------
# testing notifications
@@ -386,6 +387,9 @@ SEND_DYNATRACE=
# stackpulse configs
STACKPULSE_WEBHOOK=
+# opsgenie configs
+OPSGENIE_API_KEY=
+
# load the stock and user configuration files
# these will overwrite the variables above
@@ -532,6 +536,9 @@ filter_recipient_by_criticality() {
[ -z "${DYNATRACE_TAG_VALUE}" ] ||
[ -z "${DYNATRACE_EVENT}" ]; } && SEND_DYNATRACE="NO"
+# check opsgenie
+[ -z "${OPSGENIE_API_KEY}" ] && SEND_OPSGENIE="NO"
+
# check matrix
{ [ -z "${MATRIX_HOMESERVER}" ] || [ -z "${MATRIX_ACCESSTOKEN}" ]; } && SEND_MATRIX="NO"
@@ -559,7 +566,8 @@ if [ "${SEND_PUSHOVER}" = "YES" ] ||
[ "${SEND_CUSTOM}" = "YES" ] ||
[ "${SEND_MSTEAM}" = "YES" ] ||
[ "${SEND_DYNATRACE}" = "YES" ] ||
- [ "${SEND_STACKPULSE}" = "YES" ]; then
+ [ "${SEND_STACKPULSE}" = "YES" ] ||
+ [ "${SEND_OPSGENIE}" = "YES" ]; then
# if we need curl, check for the curl command
if [ -z "${curl}" ]; then
curl="$(command -v curl 2>/dev/null)"
@@ -588,6 +596,7 @@ if [ "${SEND_PUSHOVER}" = "YES" ] ||
SEND_CUSTOM="NO"
SEND_DYNATRACE="NO"
SEND_STACKPULSE="NO"
+ SEND_OPSGENIE="NO"
fi
fi
@@ -717,8 +726,9 @@ for method in "${SEND_EMAIL}" \
"${SEND_SMS}" \
"${SEND_MSTEAM}" \
"${SEND_DYNATRACE}" \
- "${SEND_STACKPULSE}" ; do
-
+ "${SEND_STACKPULSE}" \
+ "${SEND_OPSGENIE}" ; do
+
if [ "${method}" == "YES" ]; then
proceed=1
break
@@ -2073,6 +2083,60 @@ EOF
return 0
}
# -----------------------------------------------------------------------------
+# Opsgenie sender
+
+send_opsgenie() {
+ local payload httpcode oldv currv
+ [ "${SEND_OPSGENIE}" != "YES" ] && return 1
+
+ if [ -z "${OPSGENIE_API_KEY}" ] ; then
+ info "Can't send Opsgenie notification, because OPSGENIE_API_KEY is not defined"
+ return 1
+ fi
+
+ # We are sending null when values are nan to avoid errors while JSON message is parsed
+ [ "${old_value}" != "nan" ] && oldv="${old_value}" || oldv="null"
+ [ "${value}" != "nan" ] && currv="${value}" || currv="null"
+
+ payload=$(cat <<EOF
+ {
+ "host" : "${host}",
+ "unique_id" : "${unique_id}",
+ "alarmId" : ${alarm_id},
+ "eventId" : ${event_id},
+ "chart" : "${chart}",
+ "when": ${when},
+ "name" : "${name}",
+ "family" : "${family}",
+ "status" : "${status}",
+ "old_status" : "${old_status}",
+ "value" : ${currv},
+ "old_value" : ${oldv},
+ "duration": ${duration},
+ "non_clear_duration": ${non_clear_duration},
+ "units" : "${units}",
+ "info" : "${status_message}, ${info}",
+ "calc_expression" : "${calc_expression}",
+ "total_warnings" : "${total_warnings}",
+ "total_critical" : "${total_critical}",
+ "src" : "${src}"
+ }
+EOF
+)
+
+ httpcode=$(docurl -X POST -H "Content-Type: application/json" -d "${payload}" "https://api.opsgenie.com/v1/json/integrations/webhooks/netdata?apiKey=${OPSGENIE_API_KEY}")
+ # https://docs.opsgenie.com/docs/alert-api#create-alert
+ if [ "${httpcode}" = "200" ]; then
+ info "sent opsgenie notification for: ${host} ${chart}.${name} is ${status}"
+ else
+ error "failed to send opsgenie notification for: ${host} ${chart}.${name} is ${status}, with HTTP error code ${httpcode}."
+ return 1
+ fi
+
+ return 0
+}
+
+# -----------------------------------------------------------------------------
# prepare the content of the notification
# the url to send the user on click
@@ -2589,13 +2653,17 @@ SENT_EMAIL=$?
send_dynatrace "${host}" "${chart}" "${name}" "${status}"
SENT_DYNATRACE=$?
-
# -----------------------------------------------------------------------------
-# send the EVENT to Dynatrace
+# send the EVENT to Stackpulse
send_stackpulse
SENT_STACKPULSE=$?
# -----------------------------------------------------------------------------
+# send messages to Opsgenie
+send_opsgenie
+SENT_OPSGENIE=$?
+
+# -----------------------------------------------------------------------------
# let netdata know
for state in "${SENT_EMAIL}" \
"${SENT_PUSHOVER}" \
@@ -2623,7 +2691,8 @@ for state in "${SENT_EMAIL}" \
"${SENT_SMS}" \
"${SENT_MSTEAM}" \
"${SENT_DYNATRACE}" \
- "${SENT_STACKPULSE}" ; do
+ "${SENT_STACKPULSE}" \
+ "${SENT_OPSGENIE}"; do
if [ "${state}" -eq 0 ]; then
# we sent something
exit 0
diff --git a/health/notifications/health_alarm_notify.conf b/health/notifications/health_alarm_notify.conf
index 5615683452..4ebd9f2542 100755
--- a/health/notifications/health_alarm_notify.conf
+++ b/health/notifications/health_alarm_notify.conf
@@ -279,6 +279,15 @@ STACKPULSE_WEBHOOK=""
DEFAULT_RECIPIENT_STACKPULSE=""
#------------------------------------------------------------------------------
+# opsgenie global notification options
+SEND_OPSGENIE="YES"
+
+# Api key
+OPSGENIE_API_KEY=""
+
+DEFAULT_RECIPIENT_OPSGENIE=""
+
+#------------------------------------------------------------------------------
# hangouts (google hangouts chat) global notification options
# enable/disable sending hangouts notifications
@@ -944,6 +953,8 @@ role_recipients_rocketchat[sysadmin]="${DEFAULT_RECIPIENT_ROCKETCHAT}"
role_recipients_dynatrace[sysadmin]="${DEFAULT_RECIPIENT_DYNATRACE}"
+role_recipients_opsgenie[sysadmin]="${DEFAULT_RECIPIENT_OPSGENIE}"
+
role_recipients_matrix[sysadmin]="${DEFAULT_RECIPIENT_MATRIX}"
role_recipients_stackpulse[sysadmin]="${DEFAULT_RECIPIENT_STACKPULSE}"
@@ -999,6 +1010,8 @@ role_recipients_sms[domainadmin]="${DEFAULT_RECIPIENT_SMS}"
role_recipients_dynatrace[domainadmin]="${DEFAULT_RECIPIENT_DYNATRACE}"
+role_recipients_opsgenie[domainadmin]="${DEFAULT_RECIPIENT_OPSGENIE}"
+
role_recipients_matrix[domainadmin]="${DEFAULT_RECIPIENT_MATRIX}"
role_recipients_stackpulse[domainadmin]="${DEFAULT_RECIPIENT_STACKPULSE}"
@@ -1055,6 +1068,8 @@ role_recipients_sms[dba]="${DEFAULT_RECIPIENT_SMS}"
role_recipients_dynatrace[dba]="${DEFAULT_RECIPIENT_DYNATRACE}"
+role_recipients_opsgenie[dba]="${DEFAULT_RECIPIENT_OPSGENIE}"
+
role_recipients_matrix[dba]="${DEFAULT_RECIPIENT_MATRIX}"
role_recipients_stackpulse[dba]="${DEFAULT_RECIPIENT_STACKPULSE}"
@@ -1111,6 +1126,8 @@ role_recipients_sms[webmaster]="${DEFAULT_RECIPIENT_SMS}"
role_recipients_dynatrace[webmaster]="${DEFAULT_RECIPIENT_DYNATRACE}"
+role_recipients_opsgenie[webmaster]="${DEFAULT_RECIPIENT_OPSGENIE}"
+
role_recipients_matrix[webmaster]="${DEFAULT_RECIPIENT_MATRIX}"
role_recipients_stackpulse[webmaster]="${DEFAULT_RECIPIENT_STACKPULSE}"
@@ -1167,6 +1184,8 @@ role_recipients_sms[proxyadmin]="${DEFAULT_RECIPIENT_SMS}"
role_recipients_dynatrace[proxyadmin]="${DEFAULT_RECIPIENT_DYNATRACE}"
+role_recipients_opsgenie[proxyadmin]="${DEFAULT_RECIPIENT_OPSGENIE}"
+
role_recipients_matrix[proxyadmin]="${DEFAULT_RECIPIENT_MATRIX}"
role_recipients_stackpulse[proxyadmin]="${DEFAULT_RECIPIENT_STACKPULSE}"
@@ -1221,6 +1240,8 @@ role_recipients_sms[sitemgr]="${DEFAULT_RECIPIENT_SMS}"
role_recipients_dynatrace[sitemgr]="${DEFAULT_RECIPIENT_DYNATRACE}"
+role_recipients_opsgenie[sitemgr]="${DEFAULT_RECIPIENT_OPSGENIE}"
+
role_recipients_matrix[sitemgr]="${DEFAULT_RECIPIENT_MATRIX}"
role_recipients_stackpulse[sitemgr]="${DEFAULT_RECIPIENT_STACKPULSE}"
diff --git a/health/notifications/opsgenie/Makefile.inc b/health/notifications/opsgenie/Makefile.inc
new file mode 100644
index 0000000000..c85bb7c320
--- /dev/null
+++ b/health/notifications/opsgenie/Makefile.inc
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# THIS IS NOT A COMPLETE Makefile
+# IT IS INCLUDED BY ITS PARENT'S Makefile.am
+# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT
+
+# install these files
+dist_noinst_DATA += \
+ opsgenie/README.md \
+ opsgenie/Makefile.inc \
+ $(NULL)
+
diff --git a/health/notifications/opsgenie/README.md b/health/notifications/opsgenie/README.md
new file mode 100644
index 0000000000..aeb3154896
--- /dev/null
+++ b/health/notifications/opsgenie/README.md
@@ -0,0 +1,59 @@
+<!--
+title: "Send notifications to Opsgenie"
+description: "Send alerts to your Opsgenie incident response account any time an anomaly or performance issue strikes a node in your infrastructure."
+sidebar_label: "Opsgenie"
+custom_edit_url: https://github.com/netdata/netdata/edit/master/health/notifications/opsgenie/README.md
+-->
+
+# Send notifications to Opsgenie
+
+[Opsgenie](https://www.atlassian.com/software/opsgenie) is an alerting and incident response tool. It is designed to
+group and filter alarms, build custom routing rules for on-call teams, and correlate deployments and commits to
+incidents.
+
+The first step is to create a [Netdata integration](https://docs.opsgenie.com/docs/api-integration) in the
+[Opsgenie](https://www.atlassian.com/software/opsgenie) dashboard. After this, you need to edit
+`health_alarm_notify.conf` on your system, by running the following from your [config
+directory](/docs/configure/nodes.md):
+
+```bash
+./edit-config health_alarm_notify.conf
+```
+
+Change the variable `OPSGENIE_API_KEY` with the API key you got from Opsgenie.
+
+```
+SEND_OPSGENIE="YES"
+
+# Api key
+# Default Opsgenie APi
+OPSGENIE_API_KEY="11111111-2222-3333-4444-555555555555"
+```
+
+Changes to `health_alarm_notify.conf` do not require a Netdata restart. You can test your Opsgenie notifications
+configuration by issuing the commands, replacing `ROLE` with your preferred role:
+
+```sh
+# become user netdata
+sudo su -s /bin/bash netdata
+
+# send a test alarm
+/usr/libexec/netdata/plugins.d/alarm-notify.sh test ROLE
+```
+
+If everything works, you'll see alarms in your Opsgenie platform:
+
+![Example alarm notifications in
+Opsgenie](https://user-images.githubusercontent.com/49162938/92184518-f725f900-ee40-11ea-9afa-e7c639c72206.png)
+
+If sending the test notifications fails, you can look in `/var/log/netdata/error.log` to find the relevant error
+message:
+
+```log
+2020-09-03 23:07:00: alarm-notify.sh: ERROR: failed to send opsgenie notification for: hades test.chart.test_alarm is CRITICAL, with HTTP error code 401.
+```
+
+You can find more details about the Opsgenie error codes in their [response
+docs](https://docs.opsgenie.com/docs/response).
+
+[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fhealth%2Fnotifications%2Fopsgenie%2FREADME%2FDonations-netdata-has-received&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>)