summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEmmanuel Vasilakis <mrzammler@mm.st>2022-08-01 10:58:49 +0300
committerGitHub <noreply@github.com>2022-08-01 10:58:49 +0300
commit7b6d6ea9bdef2ba496cc6c52c224b17411baf784 (patch)
treece5ba7acdd7458844d9fde5a3092f07e774b23b2
parent093da64d34ffb4d316ab6d22ec8b57a95e7e3ef8 (diff)
Show last 15 alerts in notification (#13434)
* show last 15 alerts in notification * change to alerts * fix shellcheck
-rw-r--r--health/health.c58
-rwxr-xr-xhealth/notifications/alarm-notify.sh.in8
2 files changed, 55 insertions, 11 deletions
diff --git a/health/health.c b/health/health.c
index 26669decd3..f4b0665eb7 100644
--- a/health/health.c
+++ b/health/health.c
@@ -11,6 +11,12 @@ static struct {
ALARM_ENTRY *tail; // latest
} alarm_notifications_in_progress = {NULL, NULL};
+typedef struct active_alerts {
+ char *name;
+ time_t last_status_change;
+ RRDCALC_STATUS status;
+} active_alerts_t;
+
static inline void enqueue_alarm_notify_in_progress(ALARM_ENTRY *ae)
{
ae->prev_in_progress = NULL;
@@ -245,6 +251,15 @@ static inline RRDCALC_STATUS rrdcalc_value2status(NETDATA_DOUBLE n) {
}
#define ALARM_EXEC_COMMAND_LENGTH 8192
+#define ACTIVE_ALARMS_LIST_EXAMINE 500
+#define ACTIVE_ALARMS_LIST 15
+
+static inline int compare_active_alerts(const void * a, const void * b) {
+ active_alerts_t *active_alerts_a = (active_alerts_t *)a;
+ active_alerts_t *active_alerts_b = (active_alerts_t *)b;
+
+ return ( active_alerts_b->last_status_change - active_alerts_a->last_status_change );
+}
static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
ae->flags |= HEALTH_ENTRY_FLAG_PROCESSED;
@@ -310,31 +325,28 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
RRDCALC *rc;
EVAL_EXPRESSION *expr=NULL;
BUFFER *warn_alarms, *crit_alarms;
+ active_alerts_t *active_alerts = callocz(ACTIVE_ALARMS_LIST_EXAMINE, sizeof(active_alerts_t));
warn_alarms = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
crit_alarms = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
- for(rc = host->alarms; rc ; rc = rc->next) {
+ for(rc = host->alarms; rc && (n_warn + n_crit) < ACTIVE_ALARMS_LIST_EXAMINE ; rc = rc->next) {
if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
continue;
if (unlikely(rc->status == RRDCALC_STATUS_WARNING)) {
if (likely(ae->alarm_id != rc->id) || likely(ae->alarm_event_id != rc->next_event_id - 1)) {
- if (n_warn)
- buffer_strcat(warn_alarms, ",");
- buffer_strcat(warn_alarms, rc->name);
- buffer_strcat(warn_alarms, "=");
- buffer_snprintf(warn_alarms, 11, "%"PRId64"", (int64_t)rc->last_status_change);
+ active_alerts[n_warn+n_crit].name = rc->name;
+ active_alerts[n_warn+n_crit].last_status_change = rc->last_status_change;
+ active_alerts[n_warn+n_crit].status = rc->status;
n_warn++;
} else if (ae->alarm_id == rc->id)
expr = rc->warning;
} else if (unlikely(rc->status == RRDCALC_STATUS_CRITICAL)) {
if (likely(ae->alarm_id != rc->id) || likely(ae->alarm_event_id != rc->next_event_id - 1)) {
- if (n_crit)
- buffer_strcat(crit_alarms, ",");
- buffer_strcat(crit_alarms, rc->name);
- buffer_strcat(crit_alarms, "=");
- buffer_snprintf(crit_alarms, 11, "%"PRId64"", (int64_t)rc->last_status_change);
+ active_alerts[n_warn+n_crit].name = rc->name;
+ active_alerts[n_warn+n_crit].last_status_change = rc->last_status_change;
+ active_alerts[n_warn+n_crit].status = rc->status;
n_crit++;
} else if (ae->alarm_id == rc->id)
expr = rc->critical;
@@ -344,6 +356,29 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
}
}
+ if (n_warn+n_crit>1)
+ qsort (active_alerts, n_warn+n_crit, sizeof(active_alerts_t), compare_active_alerts);
+
+ int count_w = 0, count_c = 0;
+ while (count_w + count_c < n_warn + n_crit && count_w + count_c < ACTIVE_ALARMS_LIST) {
+ if (active_alerts[count_w+count_c].status == RRDCALC_STATUS_WARNING) {
+ if (count_w)
+ buffer_strcat(warn_alarms, ",");
+ buffer_strcat(warn_alarms, active_alerts[count_w+count_c].name);
+ buffer_strcat(warn_alarms, "=");
+ buffer_snprintf(warn_alarms, 11, "%"PRId64"", (int64_t)active_alerts[count_w+count_c].last_status_change);
+ count_w++;
+ }
+ else if (active_alerts[count_w+count_c].status == RRDCALC_STATUS_CRITICAL) {
+ if (count_c)
+ buffer_strcat(crit_alarms, ",");
+ buffer_strcat(crit_alarms, active_alerts[count_w+count_c].name);
+ buffer_strcat(crit_alarms, "=");
+ buffer_snprintf(crit_alarms, 11, "%"PRId64"", (int64_t)active_alerts[count_w+count_c].last_status_change);
+ count_c++;
+ }
+ }
+
char *edit_command = ae->source ? health_edit_command_from_source(ae->source) : strdupz("UNKNOWN=0=UNKNOWN");
snprintfz(command_to_run, ALARM_EXEC_COMMAND_LENGTH, "exec %s '%s' '%s' '%u' '%u' '%u' '%lu' '%s' '%s' '%s' '%s' '%s' '" NETDATA_DOUBLE_FORMAT_ZERO
@@ -392,6 +427,7 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
freez(edit_command);
buffer_free(warn_alarms);
buffer_free(crit_alarms);
+ freez(active_alerts);
return; //health_alarm_wait_for_execution
done:
diff --git a/health/notifications/alarm-notify.sh.in b/health/notifications/alarm-notify.sh.in
index 38a69a0f3c..0dfecade5b 100755
--- a/health/notifications/alarm-notify.sh.in
+++ b/health/notifications/alarm-notify.sh.in
@@ -2898,6 +2898,10 @@ if [ -n "$total_crit_alarms" ]; then
done <<<"$total_crit_alarms,"
fi
+if (( total_warnings + total_critical > 15 )); then
+ EXTRA_ALARMS_LIST_TEXT="(Showing latest 15 alerts)"
+fi
+
if [ -n "$edit_command_line" ]; then
IFS='=' read -r edit_command line s_host <<<"$edit_command_line"
fi
@@ -3423,6 +3427,10 @@ Content-Transfer-Encoding: 8bit
<span style="font-weight:600">${total_critical} critical</span>
additional active alert(s)</div>
</td>
+ </tr>
+ <td align="left" style="font-size:0px;padding:10px 25px;word-break:break-word;">
+ <div style="font-family:Open Sans, sans-serif;font-size:12px;line-height:1;text-align:center;color:#35414A;">${EXTRA_ALARMS_LIST_TEXT}</div>
+ </td>
</tr>
</tbody>
</table>