// SPDX-License-Identifier: GPL-3.0-or-later #define NETDATA_HEALTH_INTERNALS #include "rrd.h" // ---------------------------------------------------------------------------- // RRDCALC management inline const char *rrdcalc_status2string(RRDCALC_STATUS status) { switch(status) { case RRDCALC_STATUS_REMOVED: return "REMOVED"; case RRDCALC_STATUS_UNDEFINED: return "UNDEFINED"; case RRDCALC_STATUS_UNINITIALIZED: return "UNINITIALIZED"; case RRDCALC_STATUS_CLEAR: return "CLEAR"; case RRDCALC_STATUS_RAISED: return "RAISED"; case RRDCALC_STATUS_WARNING: return "WARNING"; case RRDCALC_STATUS_CRITICAL: return "CRITICAL"; default: error("Unknown alarm status %d", status); return "UNKNOWN"; } } static STRING *rrdcalc_replace_variables(const char *line, RRDCALC *rc) { if (!line || !*line) return NULL; size_t pos = 0; char *temp = strdupz(line); char var[RRDCALC_VAR_MAX]; char *m, *lbl_value = NULL; while ((m = strchr(temp + pos, '$'))) { int i = 0; char *e = m; while (*e) { if (*e == ' ' || i == RRDCALC_VAR_MAX - 1) break; else var[i] = *e; e++; i++; } var[i] = '\0'; pos = m - temp + 1; if (!strcmp(var, RRDCALC_VAR_FAMILY)) { char *buf = find_and_replace(temp, var, (rc->rrdset && rc->rrdset->family) ? rrdset_family(rc->rrdset) : "", m); freez(temp); temp = buf; } else if (!strncmp(var, RRDCALC_VAR_LABEL, RRDCALC_VAR_LABEL_LEN)) { if(likely(rc->rrdset && rc->rrdset->rrdlabels)) { rrdlabels_get_value_to_char_or_null(rc->rrdset->rrdlabels, &lbl_value, var+RRDCALC_VAR_LABEL_LEN); if (lbl_value) { char *buf = find_and_replace(temp, var, lbl_value, m); freez(temp); temp = buf; freez(lbl_value); } } } } STRING *ret = string_strdupz(temp); freez(temp); return ret; } void rrdcalc_update_rrdlabels(RRDSET *st) { RRDCALC *rc; foreach_rrdcalc_in_rrdset(st, rc) { if (rc->original_info) { if (rc->info) string_freez(rc->info); rc->info = rrdcalc_replace_variables(rrdcalc_original_info(rc), rc); } } } static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { RRDHOST *host = st->rrdhost; debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host)); rc->last_status_change = now_realtime_sec(); rc->rrdset = st; DOUBLE_LINKED_LIST_PREPEND_UNSAFE(st->alarms, rc, rrdset_prev, rrdset_next); if(rc->update_every < rc->rrdset->update_every) { error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every); rc->update_every = rc->rrdset->update_every; } if(!isnan(rc->green) && isnan(st->green)) { debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from " NETDATA_DOUBLE_FORMAT_AUTO " to " NETDATA_DOUBLE_FORMAT_AUTO ".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->green, rc->green); st->green = rc->green; } if(!isnan(rc->red) && isnan(st->red)) { debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from " NETDATA_DOUBLE_FORMAT_AUTO " to " NETDATA_DOUBLE_FORMAT_AUTO ".", rrdset_id(rc->rrdset), rrdcalc_name(rc), rc->rrdset->red, rc->red); st->red = rc->red; } rc->local = rrdvar_create_and_index("local", st->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_LOCAL_VAR, &rc->value); rc->family = rrdvar_create_and_index("family", st->rrdfamily->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_FAMILY_VAR, &rc->value); char fullname[RRDVAR_MAX_LENGTH + 1]; snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_id(st), rrdcalc_name(rc)); STRING *fullname_string = string_strdupz(fullname); rc->hostid = rrdvar_create_and_index("host", host->rrdvar_root_index, fullname_string, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTID_VAR, &rc->value); snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", rrdset_name(st), rrdcalc_name(rc)); rc->hostname = rrdvar_create_and_index("host", host->rrdvar_root_index, fullname_string, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR, &rc->value); string_freez(fullname_string); if(rc->hostid && !rc->hostname) rc->hostid->options |= RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR; if(!rc->units) rc->units = string_dup(st->units); if (rc->original_info) { if (rc->info) string_freez(rc->info); rc->info = rrdcalc_replace_variables(rrdcalc_original_info(rc), rc); } time_t now = now_realtime_sec(); ALARM_ENTRY *ae = health_create_alarm_entry( host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id, rc->rrdset->context, rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_UNINITIALIZED, rc->source, rc->units, rc->info, 0, rrdcalc_isrepeating(rc)?HEALTH_ENTRY_FLAG_IS_REPEATING:0); health_alarm_log(host, ae); } static inline int rrdcalc_is_matching_rrdset(RRDCALC *rc, RRDSET *st) { if ( (rc->chart != st->id) && (rc->chart != st->name)) return 0; if (rc->module_pattern && !simple_pattern_matches(rc->module_pattern, rrdset_module_name(st))) return 0; if (rc->plugin_pattern && !simple_pattern_matches(rc->plugin_pattern, rrdset_plugin_name(st))) return 0; if (st->rrdhost->rrdlabels && rc->host_labels_pattern && !rrdlabels_match_simple_pattern_parsed(st->rrdhost->rrdlabels, rc->host_labels_pattern, '=')) return 0; return 1; } // this has to be called while the RRDHOST is locked inline void rrdsetcalc_link_matching(RRDSET *st) { RRDHOST *host = st->rrdhost; // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id); RRDCALC *rc; foreach_rrdcalc_in_rrdhost(host, rc) { if(unlikely(rc->rrdset)) continue; if(unlikely(rrdcalc_is_matching_rrdset(rc, st))) rrdsetcalc_link(st, rc); } } // this has to be called while the RRDHOST is locked inline void rrdsetcalc_unlink(RRDCALC *rc) { RRDSET *st = rc->rrdset; if(!st) { debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc)); error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rrdcalc_chart_name(rc), rrdcalc_name(rc)); return; } RRDHOST *host = st->rrdhost; time_t now = now_realtime_sec(); ALARM_ENTRY *ae = health_create_alarm_entry( host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id, rc->rrdset->context, rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_REMOVED, rc->source, rc->units, rc->info, 0, 0); health_alarm_log(host, ae); debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdset_id(st), rrdhost_hostname(host)); // unlink it DOUBLE_LINKED_LIST_REMOVE_UNSAFE(st->alarms, rc, rrdset_prev, rrdset_next); rrdvar_free(host, st->rrdvar_root_index, rc->local); rc->local = NULL; rrdvar_free(host, st->rrdfamily->rrdvar_root_index, rc->family); rc->family = NULL; rrdvar_free(host, host->rrdvar_root_index, rc->hostid); rc->hostid = NULL; rrdvar_free(host, host->rrdvar_root_index, rc->hostname); rc->hostname = NULL; rc->rrdset = NULL; // RRDCALC will remain in RRDHOST // so that if the matching chart is found in the future // it will be applied automatically } RRDCALC *rrdcalc_find(RRDSET *st, const char *name) { RRDCALC *rc = NULL; STRING *name_string = string_strdupz(name); foreach_rrdcalc_in_rrdset(st, rc) { if(unlikely(rc->name == name_string)) break; } string_freez(name_string); return rc; } inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name) { RRDCALC *rc; if(unlikely(!chart)) { error("attempt to find RRDCALC '%s' without giving a chart name", name); return 1; } STRING *name_string = string_strdupz(name); STRING *chart_string = string_strdupz(chart); int ret = 0; // make sure it does not already exist foreach_rrdcalc_in_rrdhost(host, rc) { if (unlikely(rc->chart == chart_string && rc->name == name_string)) { debug(D_HEALTH, "Health alarm '%s/%s' already exists in host '%s'.", chart, name, rrdhost_hostname(host)); info("Health alarm '%s/%s' already exists in host '%s'.", chart, name, rrdhost_hostname(host)); ret = 1; break; } } string_freez(name_string); string_freez(chart_string); return ret; } inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id) { // re-use old IDs, by looking them up in the alarm log ALARM_ENTRY *ae = NULL; for(ae = host->health_log.alarms; ae ;ae = ae->next) { if(unlikely(name == ae->name && chart == ae->chart)) { if(next_event_id) *next_event_id = ae->alarm_event_id + 1; break; } } if(ae) return ae->alarm_id; if (unlikely(!host->health_log.next_alarm_id)) host->health_log.next_alarm_id = (uint32_t)now_realtime_sec(); return host->health_log.next_alarm_id++; } /** * Alarm name with dimension * * Change the name of the current alarm appending a new diagram. * * @param name the alarm name * @param namelen is the length of the previous vector. * @param dim the dimension of the chart. * @param dimlen is the length of the previous vector. * * @return It returns the new name on success and the old otherwise */ char *alarm_name_with_dim(const char *name, size_t namelen, const char *dim, size_t dimlen) { char *newname,*move; newname = mallocz(namelen + dimlen + 2); move = newname; memcpy(move, name, namelen); move += namelen; *move++ = '_'; memcpy(move, dim, dimlen); move += dimlen; *move = '\0'; return newname; } /** * Remove pipe comma * * Remove the pipes and commas converting to space. * * @param str the string to change. */ void dimension_remove_pipe_comma(char *str) { while(*str) { if(*str == '|' || *str == ',') *str = ' '; str++; } } inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) { rrdhost_check_rdlock(host); if(rc->calculation) { rc->calculation->status = &rc->status; rc->calculation->myself = &rc->value; rc->calculation->after = &rc->db_after; rc->calculation->before = &rc->db_before; rc->calculation->rrdcalc = rc; } if(rc->warning) { rc->warning->status = &rc->status; rc->warning->myself = &rc->value; rc->warning->after = &rc->db_after; rc->warning->before = &rc->db_before; rc->warning->rrdcalc = rc; } if(rc->critical) { rc->critical->status = &rc->status; rc->critical->myself = &rc->value; rc->critical->after = &rc->db_after; rc->critical->before = &rc->db_before; rc->critical->rrdcalc = rc; } if(!rc->foreachdim) { // link it to the host alarms list DOUBLE_LINKED_LIST_APPEND_UNSAFE(host->host_alarms, rc, prev, next); // link it to its chart RRDSET *st; rrdset_foreach_read(st, host) { if(rrdcalc_is_matching_rrdset(rc, st)) { rrdsetcalc_link(st, rc); break; } } } else { DOUBLE_LINKED_LIST_APPEND_UNSAFE(host->alarms_with_foreach, rc, prev, next); // We are not linking this alarm directly to the host here // It will eventually be done if it matches the dimensions } } inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) { debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rrdcalctemplate_name(rt)); if(rrdcalc_exists(host, chart, rrdcalctemplate_name(rt))) return NULL; RRDCALC *rc = callocz(1, sizeof(RRDCALC)); rc->next_event_id = 1; rc->name = string_dup(rt->name); rc->chart = string_strdupz(chart); uuid_copy(rc->config_hash_id, rt->config_hash_id); rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id); rc->dimensions = string_dup(rt->dimensions); rc->foreachdim = string_dup(rt->foreachdim); if(rt->foreachdim) rc->spdim = health_pattern_from_foreach(rrdcalc_foreachdim(rc)); rc->foreachcounter = rt->foreachcounter; rc->green = rt->green; rc->red = rt->red; rc->value = NAN; rc->old_value = NAN; rc->delay_up_duration = rt->delay_up_duration; rc->delay_down_duration = rt->delay_down_duration; rc->delay_max_duration = rt->delay_max_duration; rc->delay_multiplier = rt->delay_multiplier; rc->last_repeat = 0; rc->times_repeat = 0; rc->warn_repeat_every = rt->warn_repeat_every; rc->crit_repeat_every = rt->crit_repeat_every; rc->group = rt->group; rc->after = rt->after; rc->before = rt->before; rc->update_every = rt->update_every; rc->options = rt->options; rc->exec = string_dup(rt->exec); rc->recipient = string_dup(rt->recipient); rc->source = string_dup(rt->source); rc->units = string_dup(rt->units); rc->info = string_dup(rt->info); rc->original_info = string_dup(rt->info); rc->classification = string_dup(rt->classification); rc->component = string_dup(rt->component); rc->type = string_dup(rt->type); if(rt->calculation) { rc->calculation = expression_parse(rt->calculation->source, NULL, NULL); if(!rc->calculation) error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, rrdcalctemplate_name(rt), rt->calculation->source); } if(rt->warning) { rc->warning = expression_parse(rt->warning->source, NULL, NULL); if(!rc->warning) error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, rrdcalctemplate_name(rt), rt->warning->source); } if(rt->critical) { rc->critical = expression_parse(rt->critical->source, NULL, NULL); if(!rc->critical) error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rrdcalctemplate_name(rt), rt->critical->source); } debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " NETDATA_DOUBLE_FORMAT_AUTO ", red " NETDATA_DOUBLE_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u", rrdcalc_chart_name(rc), rrdcalc_name(rc), (rc->exec)?rrdcalc_exec(rc):"DEFAULT", (rc->recipient)?rrdcalc_recipient(rc):"DEFAULT", rc->green, rc->red, (int)rc->group, rc->after, rc->before, rc->options, (rc->dimensions)?rrdcalc_dimensions(rc):"NONE", (rc->foreachdim)?rrdcalc_foreachdim(rc):"NONE", rc->update_every, (rc->calculation)?rc->calculation->parsed_as:"NONE", (rc->warning)?rc->warning->parsed_as:"NONE", (rc->critical)?rc->critical->parsed_as:"NONE", rrdcalc_source(rc), rc->delay_up_duration, rc->delay_down_duration, rc->delay_max_duration, rc->delay_multiplier, rc->warn_repeat_every, rc->crit_repeat_every ); rrdcalc_add_to_host(host, rc); return rc; } /** * Create from RRDCALC * * Create a new alarm using another alarm as template. * * @param rc is the alarm that will be used as source * @param host is the host structure. * @param name is the newest chart name. * @param dimension is the current dimension * @param foreachdim the whole list of dimension * * @return it returns the new alarm changed. */ inline RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const char *name, const char *dimension) { RRDCALC *newrc = callocz(1, sizeof(RRDCALC)); newrc->next_event_id = 1; newrc->name = string_strdupz(name); newrc->chart = string_dup(rc->chart); newrc->id = rrdcalc_get_unique_id(host, newrc->chart, newrc->name, &rc->next_event_id); uuid_copy(newrc->config_hash_id, *((uuid_t *) &rc->config_hash_id)); newrc->dimensions = string_strdupz(dimension); newrc->foreachdim = NULL; rc->foreachcounter++; newrc->foreachcounter = rc->foreachcounter; newrc->green = rc->green; newrc->red = rc->red; newrc->value = NAN; newrc->old_value = NAN; newrc->delay_up_duration = rc->delay_up_duration; newrc->delay_down_duration = rc->delay_down_duration; newrc->delay_max_duration = rc->delay_max_duration; newrc->delay_multiplier = rc->delay_multiplier; newrc->last_repeat = 0; newrc->times_repeat = 0; newrc->warn_repeat_every = rc->warn_repeat_every; newrc->crit_repeat_every = rc->crit_repeat_every; newrc->group = rc->group; newrc->after = rc->after; newrc->before = rc->before; newrc->update_every = rc->update_every; newrc->options = rc->options; newrc->exec = string_dup(rc->exec); newrc->recipient = string_dup(rc->recipient); newrc->source = string_dup(rc->source); newrc->units = string_dup(rc->units); newrc->info = string_dup(rc->info); newrc->original_info = string_dup(rc->original_info); newrc->classification = string_dup(rc->classification); newrc->component = string_dup(rc->component); newrc->type = string_dup(rc->type); if(rc->calculation) { newrc->calculation = expression_parse(rc->calculation->source, NULL, NULL); if(!newrc->calculation) error("Health alarm '%s.%s': failed to parse calculation expression '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rc->calculation->source); } if(rc->warning) { newrc->warning = expression_parse(rc->warning->source, NULL, NULL); if(!newrc->warning) error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rc->warning->source); } if(rc->critical) { newrc->critical = expression_parse(rc->critical->source, NULL, NULL); if(!newrc->critical) error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rc->critical->source); } return newrc; } void rrdcalc_free(RRDCALC *rc) { if(unlikely(!rc)) return; expression_free(rc->calculation); expression_free(rc->warning); expression_free(rc->critical); string_freez(rc->name); string_freez(rc->chart); string_freez(rc->dimensions); string_freez(rc->foreachdim); string_freez(rc->exec); string_freez(rc->recipient); string_freez(rc->source); string_freez(rc->units); string_freez(rc->info); string_freez(rc->original_info); string_freez(rc->classification); string_freez(rc->component); string_freez(rc->type); string_freez(rc->host_labels); string_freez(rc->module_match); string_freez(rc->plugin_match); simple_pattern_free(rc->spdim); simple_pattern_free(rc->host_labels_pattern); simple_pattern_free(rc->module_pattern); simple_pattern_free(rc->plugin_pattern); freez(rc->family); freez(rc); } void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc) { if(unlikely(!rc)) return; debug(D_HEALTH, "Health removing alarm '%s.%s' of host '%s'", rrdcalc_chart_name(rc), rrdcalc_name(rc), rrdhost_hostname(host)); // unlink it from RRDSET if(rc->rrdset) rrdsetcalc_unlink(rc); // unlink it from RRDHOST DOUBLE_LINKED_LIST_REMOVE_UNSAFE(host->host_alarms, rc, prev, next); rrdcalc_free(rc); } void rrdcalc_foreach_unlink_and_free(RRDHOST *host, RRDCALC *rc) { DOUBLE_LINKED_LIST_REMOVE_UNSAFE(host->alarms_with_foreach, rc, prev, next); rrdcalc_free(rc); } static void rrdcalc_labels_unlink_alarm_loop(RRDHOST *host, RRDCALC *alarms) { for(RRDCALC *rc = alarms ; rc ; ) { RRDCALC *rc_next = rc->next; if (!rc->host_labels) { rc = rc_next; continue; } if(!rrdlabels_match_simple_pattern_parsed(host->rrdlabels, rc->host_labels_pattern, '=')) { info("Health configuration for alarm '%s' cannot be applied, because the host %s does not have the label(s) '%s'", rrdcalc_name(rc), rrdhost_hostname(host), rrdcalc_host_labels(rc)); if(host->host_alarms == alarms) rrdcalc_unlink_and_free(host, rc); else rrdcalc_foreach_unlink_and_free(host, rc); } rc = rc_next; } } void rrdcalc_labels_unlink_alarm_from_host(RRDHOST *host) { rrdcalc_labels_unlink_alarm_loop(host, host->host_alarms); rrdcalc_labels_unlink_alarm_loop(host, host->alarms_with_foreach); } void rrdcalc_labels_unlink() { rrd_rdlock(); RRDHOST *host; rrdhost_foreach_read(host) { if (unlikely(!host->health_enabled)) continue; if (host->rrdlabels) { rrdhost_wrlock(host); rrdcalc_labels_unlink_alarm_from_host(host); rrdhost_unlock(host); } } rrd_unlock(); }