diff options
author | thiagoftsm <thiagoftsm@gmail.com> | 2020-10-28 13:48:53 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-10-28 13:48:53 +0000 |
commit | 1a50899551674e0f0fa684daeadd043503be949b (patch) | |
tree | 8032cbeca41f5a23ae6f7f8197e09fcb5b015ed6 /health | |
parent | 93f94c003ef6d494e6beb05a1e6523f2a2cf1a00 (diff) |
Opsgenie integration (#9879)
Bring full integration with Opsgenie.
Diffstat (limited to 'health')
-rw-r--r-- | health/notifications/Makefile.am | 1 | ||||
-rwxr-xr-x | health/notifications/alarm-notify.sh.in | 81 | ||||
-rwxr-xr-x | health/notifications/health_alarm_notify.conf | 21 | ||||
-rw-r--r-- | health/notifications/opsgenie/Makefile.inc | 12 | ||||
-rw-r--r-- | health/notifications/opsgenie/README.md | 59 |
5 files changed, 168 insertions, 6 deletions
diff --git a/health/notifications/Makefile.am b/health/notifications/Makefile.am index 4af9f7782e..e6b42138e0 100644 --- a/health/notifications/Makefile.am +++ b/health/notifications/Makefile.am @@ -35,6 +35,7 @@ include hangouts/Makefile.inc include irc/Makefile.inc include kavenegar/Makefile.inc include messagebird/Makefile.inc +include opsgenie/Makefile.inc include pagerduty/Makefile.inc include pushbullet/Makefile.inc include pushover/Makefile.inc diff --git a/health/notifications/alarm-notify.sh.in b/health/notifications/alarm-notify.sh.in index 78641f07d0..097555f42c 100755 --- a/health/notifications/alarm-notify.sh.in +++ b/health/notifications/alarm-notify.sh.in @@ -37,6 +37,7 @@ # - Google Hangouts Chat notifications by @EnzoAkira and @hendrikhofstadt # - Dynatrace Event by @illumine # - Stackpulse Event by @thiagoftsm +# - Opsgenie by @thiaoftsm #9858 # ----------------------------------------------------------------------------- # testing notifications @@ -386,6 +387,9 @@ SEND_DYNATRACE= # stackpulse configs STACKPULSE_WEBHOOK= +# opsgenie configs +OPSGENIE_API_KEY= + # load the stock and user configuration files # these will overwrite the variables above @@ -532,6 +536,9 @@ filter_recipient_by_criticality() { [ -z "${DYNATRACE_TAG_VALUE}" ] || [ -z "${DYNATRACE_EVENT}" ]; } && SEND_DYNATRACE="NO" +# check opsgenie +[ -z "${OPSGENIE_API_KEY}" ] && SEND_OPSGENIE="NO" + # check matrix { [ -z "${MATRIX_HOMESERVER}" ] || [ -z "${MATRIX_ACCESSTOKEN}" ]; } && SEND_MATRIX="NO" @@ -559,7 +566,8 @@ if [ "${SEND_PUSHOVER}" = "YES" ] || [ "${SEND_CUSTOM}" = "YES" ] || [ "${SEND_MSTEAM}" = "YES" ] || [ "${SEND_DYNATRACE}" = "YES" ] || - [ "${SEND_STACKPULSE}" = "YES" ]; then + [ "${SEND_STACKPULSE}" = "YES" ] || + [ "${SEND_OPSGENIE}" = "YES" ]; then # if we need curl, check for the curl command if [ -z "${curl}" ]; then curl="$(command -v curl 2>/dev/null)" @@ -588,6 +596,7 @@ if [ "${SEND_PUSHOVER}" = "YES" ] || SEND_CUSTOM="NO" SEND_DYNATRACE="NO" SEND_STACKPULSE="NO" + SEND_OPSGENIE="NO" fi fi @@ -717,8 +726,9 @@ for method in "${SEND_EMAIL}" \ "${SEND_SMS}" \ "${SEND_MSTEAM}" \ "${SEND_DYNATRACE}" \ - "${SEND_STACKPULSE}" ; do - + "${SEND_STACKPULSE}" \ + "${SEND_OPSGENIE}" ; do + if [ "${method}" == "YES" ]; then proceed=1 break @@ -2073,6 +2083,60 @@ EOF return 0 } # ----------------------------------------------------------------------------- +# Opsgenie sender + +send_opsgenie() { + local payload httpcode oldv currv + [ "${SEND_OPSGENIE}" != "YES" ] && return 1 + + if [ -z "${OPSGENIE_API_KEY}" ] ; then + info "Can't send Opsgenie notification, because OPSGENIE_API_KEY is not defined" + return 1 + fi + + # We are sending null when values are nan to avoid errors while JSON message is parsed + [ "${old_value}" != "nan" ] && oldv="${old_value}" || oldv="null" + [ "${value}" != "nan" ] && currv="${value}" || currv="null" + + payload=$(cat <<EOF + { + "host" : "${host}", + "unique_id" : "${unique_id}", + "alarmId" : ${alarm_id}, + "eventId" : ${event_id}, + "chart" : "${chart}", + "when": ${when}, + "name" : "${name}", + "family" : "${family}", + "status" : "${status}", + "old_status" : "${old_status}", + "value" : ${currv}, + "old_value" : ${oldv}, + "duration": ${duration}, + "non_clear_duration": ${non_clear_duration}, + "units" : "${units}", + "info" : "${status_message}, ${info}", + "calc_expression" : "${calc_expression}", + "total_warnings" : "${total_warnings}", + "total_critical" : "${total_critical}", + "src" : "${src}" + } +EOF +) + + httpcode=$(docurl -X POST -H "Content-Type: application/json" -d "${payload}" "https://api.opsgenie.com/v1/json/integrations/webhooks/netdata?apiKey=${OPSGENIE_API_KEY}") + # https://docs.opsgenie.com/docs/alert-api#create-alert + if [ "${httpcode}" = "200" ]; then + info "sent opsgenie notification for: ${host} ${chart}.${name} is ${status}" + else + error "failed to send opsgenie notification for: ${host} ${chart}.${name} is ${status}, with HTTP error code ${httpcode}." + return 1 + fi + + return 0 +} + +# ----------------------------------------------------------------------------- # prepare the content of the notification # the url to send the user on click @@ -2589,13 +2653,17 @@ SENT_EMAIL=$? send_dynatrace "${host}" "${chart}" "${name}" "${status}" SENT_DYNATRACE=$? - # ----------------------------------------------------------------------------- -# send the EVENT to Dynatrace +# send the EVENT to Stackpulse send_stackpulse SENT_STACKPULSE=$? # ----------------------------------------------------------------------------- +# send messages to Opsgenie +send_opsgenie +SENT_OPSGENIE=$? + +# ----------------------------------------------------------------------------- # let netdata know for state in "${SENT_EMAIL}" \ "${SENT_PUSHOVER}" \ @@ -2623,7 +2691,8 @@ for state in "${SENT_EMAIL}" \ "${SENT_SMS}" \ "${SENT_MSTEAM}" \ "${SENT_DYNATRACE}" \ - "${SENT_STACKPULSE}" ; do + "${SENT_STACKPULSE}" \ + "${SENT_OPSGENIE}"; do if [ "${state}" -eq 0 ]; then # we sent something exit 0 diff --git a/health/notifications/health_alarm_notify.conf b/health/notifications/health_alarm_notify.conf index 5615683452..4ebd9f2542 100755 --- a/health/notifications/health_alarm_notify.conf +++ b/health/notifications/health_alarm_notify.conf @@ -279,6 +279,15 @@ STACKPULSE_WEBHOOK="" DEFAULT_RECIPIENT_STACKPULSE="" #------------------------------------------------------------------------------ +# opsgenie global notification options +SEND_OPSGENIE="YES" + +# Api key +OPSGENIE_API_KEY="" + +DEFAULT_RECIPIENT_OPSGENIE="" + +#------------------------------------------------------------------------------ # hangouts (google hangouts chat) global notification options # enable/disable sending hangouts notifications @@ -944,6 +953,8 @@ role_recipients_rocketchat[sysadmin]="${DEFAULT_RECIPIENT_ROCKETCHAT}" role_recipients_dynatrace[sysadmin]="${DEFAULT_RECIPIENT_DYNATRACE}" +role_recipients_opsgenie[sysadmin]="${DEFAULT_RECIPIENT_OPSGENIE}" + role_recipients_matrix[sysadmin]="${DEFAULT_RECIPIENT_MATRIX}" role_recipients_stackpulse[sysadmin]="${DEFAULT_RECIPIENT_STACKPULSE}" @@ -999,6 +1010,8 @@ role_recipients_sms[domainadmin]="${DEFAULT_RECIPIENT_SMS}" role_recipients_dynatrace[domainadmin]="${DEFAULT_RECIPIENT_DYNATRACE}" +role_recipients_opsgenie[domainadmin]="${DEFAULT_RECIPIENT_OPSGENIE}" + role_recipients_matrix[domainadmin]="${DEFAULT_RECIPIENT_MATRIX}" role_recipients_stackpulse[domainadmin]="${DEFAULT_RECIPIENT_STACKPULSE}" @@ -1055,6 +1068,8 @@ role_recipients_sms[dba]="${DEFAULT_RECIPIENT_SMS}" role_recipients_dynatrace[dba]="${DEFAULT_RECIPIENT_DYNATRACE}" +role_recipients_opsgenie[dba]="${DEFAULT_RECIPIENT_OPSGENIE}" + role_recipients_matrix[dba]="${DEFAULT_RECIPIENT_MATRIX}" role_recipients_stackpulse[dba]="${DEFAULT_RECIPIENT_STACKPULSE}" @@ -1111,6 +1126,8 @@ role_recipients_sms[webmaster]="${DEFAULT_RECIPIENT_SMS}" role_recipients_dynatrace[webmaster]="${DEFAULT_RECIPIENT_DYNATRACE}" +role_recipients_opsgenie[webmaster]="${DEFAULT_RECIPIENT_OPSGENIE}" + role_recipients_matrix[webmaster]="${DEFAULT_RECIPIENT_MATRIX}" role_recipients_stackpulse[webmaster]="${DEFAULT_RECIPIENT_STACKPULSE}" @@ -1167,6 +1184,8 @@ role_recipients_sms[proxyadmin]="${DEFAULT_RECIPIENT_SMS}" role_recipients_dynatrace[proxyadmin]="${DEFAULT_RECIPIENT_DYNATRACE}" +role_recipients_opsgenie[proxyadmin]="${DEFAULT_RECIPIENT_OPSGENIE}" + role_recipients_matrix[proxyadmin]="${DEFAULT_RECIPIENT_MATRIX}" role_recipients_stackpulse[proxyadmin]="${DEFAULT_RECIPIENT_STACKPULSE}" @@ -1221,6 +1240,8 @@ role_recipients_sms[sitemgr]="${DEFAULT_RECIPIENT_SMS}" role_recipients_dynatrace[sitemgr]="${DEFAULT_RECIPIENT_DYNATRACE}" +role_recipients_opsgenie[sitemgr]="${DEFAULT_RECIPIENT_OPSGENIE}" + role_recipients_matrix[sitemgr]="${DEFAULT_RECIPIENT_MATRIX}" role_recipients_stackpulse[sitemgr]="${DEFAULT_RECIPIENT_STACKPULSE}" diff --git a/health/notifications/opsgenie/Makefile.inc b/health/notifications/opsgenie/Makefile.inc new file mode 100644 index 0000000000..c85bb7c320 --- /dev/null +++ b/health/notifications/opsgenie/Makefile.inc @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# THIS IS NOT A COMPLETE Makefile +# IT IS INCLUDED BY ITS PARENT'S Makefile.am +# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT + +# install these files +dist_noinst_DATA += \ + opsgenie/README.md \ + opsgenie/Makefile.inc \ + $(NULL) + diff --git a/health/notifications/opsgenie/README.md b/health/notifications/opsgenie/README.md new file mode 100644 index 0000000000..aeb3154896 --- /dev/null +++ b/health/notifications/opsgenie/README.md @@ -0,0 +1,59 @@ +<!-- +title: "Send notifications to Opsgenie" +description: "Send alerts to your Opsgenie incident response account any time an anomaly or performance issue strikes a node in your infrastructure." +sidebar_label: "Opsgenie" +custom_edit_url: https://github.com/netdata/netdata/edit/master/health/notifications/opsgenie/README.md +--> + +# Send notifications to Opsgenie + +[Opsgenie](https://www.atlassian.com/software/opsgenie) is an alerting and incident response tool. It is designed to +group and filter alarms, build custom routing rules for on-call teams, and correlate deployments and commits to +incidents. + +The first step is to create a [Netdata integration](https://docs.opsgenie.com/docs/api-integration) in the +[Opsgenie](https://www.atlassian.com/software/opsgenie) dashboard. After this, you need to edit +`health_alarm_notify.conf` on your system, by running the following from your [config +directory](/docs/configure/nodes.md): + +```bash +./edit-config health_alarm_notify.conf +``` + +Change the variable `OPSGENIE_API_KEY` with the API key you got from Opsgenie. + +``` +SEND_OPSGENIE="YES" + +# Api key +# Default Opsgenie APi +OPSGENIE_API_KEY="11111111-2222-3333-4444-555555555555" +``` + +Changes to `health_alarm_notify.conf` do not require a Netdata restart. You can test your Opsgenie notifications +configuration by issuing the commands, replacing `ROLE` with your preferred role: + +```sh +# become user netdata +sudo su -s /bin/bash netdata + +# send a test alarm +/usr/libexec/netdata/plugins.d/alarm-notify.sh test ROLE +``` + +If everything works, you'll see alarms in your Opsgenie platform: + +![Example alarm notifications in +Opsgenie](https://user-images.githubusercontent.com/49162938/92184518-f725f900-ee40-11ea-9afa-e7c639c72206.png) + +If sending the test notifications fails, you can look in `/var/log/netdata/error.log` to find the relevant error +message: + +```log +2020-09-03 23:07:00: alarm-notify.sh: ERROR: failed to send opsgenie notification for: hades test.chart.test_alarm is CRITICAL, with HTTP error code 401. +``` + +You can find more details about the Opsgenie error codes in their [response +docs](https://docs.opsgenie.com/docs/response). + +[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fhealth%2Fnotifications%2Fopsgenie%2FREADME%2FDonations-netdata-has-received&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)](<>) |