summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorthiagoftsm <thiagoftsm@gmail.com>2019-09-27 12:24:54 +0000
committerChris Akritidis <43294513+cakrit@users.noreply.github.com>2019-09-27 14:24:54 +0200
commite3471fa5727bcf286dd3b52ec0cdecd8fdf7067e (patch)
treef2fa1e28143ecdae85536e8488965a7be6c7bfe2
parenta8b28bfbd2fe5a1814e6ddbb211961158f221fda (diff)
Create a template for all dimensions (#6560)
* health_connection: Comments inside Health Config To try to understand better what is necessary to change and where it is necessary to change anything inside the health, I commented the functions inside this file" " * health_connection: Comments about Health in other files This commit brings the rest of the comments that were missed for health" * health_connection: Comments on health_log I had to append more comments on health_log * health_connection: Create a new variable New variable is created to work with foreach * health_connection: Fix new option and doc The first implementation of the 'foreach' had a problem, this fixes the error. This commit also brings the updates for the documentation * health_connection: Understanding health This commit is to save the place that I am working, it has the map to understand all the alam process * health_connection: Update map I changed the position of the error message to identify the correct place to add new alarms * health_connection: End of simple alarm This commit finishes what is necessary to bring the same lookup for different dimensions in one unique line * health_connection: Documentation and template steps This commit brings the documentation missed for template and comments to help in the next step of apply a template to create an alarm. * health_connection: Restoring After some tests, it was detected that the alarms were not working as expected * health_connection: Fix bug and bring dimension to template This commit brings a fix for an old Netdata bug, before this the Netdata always tried to create a new entry in an index with the same id raising an error. It also brings the possibility to use 'foreach' in template * health_connection: Fix cmake compilation There was a problem with cmake compilation fixed by this commit * health_connection: shell script Finilize the shell script to test the PR * health_connection: Remove debug message During the development, I used some messages to understand the code this commit removes the last message * health_connection: Fix bugs This commits fix bugs reported by tests * health_connection: Alarm working This commit brings the necessary change for the alarms work, but it is missing the unlink from the newest list * health_connection: Template code written This commit finishes the creation of alarm from template, but it was not tested yet. * health_connection: Remove comments I am removing the comments from this PR to bring back late * health_connection: Remove lines Another commit to restore the files before they to be commented * health_connection: New alarm and remove messages I am bringing a new alarm to test template with SP and removing comments used during the development * health_connection: Functional test review After to review the functional test script, it was necessary to small adjust to test all the features available with the new version * health_connection: Free structure I am moving the free list for the correct place, the previous place was not safe * health_connection: ShellCheck This commit fixes the problems with shellcheck * health_connection: FIx hash This commit fix the hash calculation that was using wrong input * health_connection: Fix message error The system was showing a wronge message, because when we have foreach the alarm created with templated is added in a second stage to the index * health_connection: Fix documentation In this commit I am fixing the grammar of the previous doc and bringing two examples * health_connection: Fix examples This commit fix the last two examples that was brought in this PR * health_connection: Fix example doc When I brought the correct grammar in the last commit, I lost a mark * health_connection: Grammar fix Fixing grammar of the documentation * health_connection: Memory leak This commit fixes the memory leak that was present in the PR * health_connection: Reload This commit fix the problem that the alarms were not linked after to receive a SIGUSR2 * health_connection: False Positive from codacy Codacy was given a false positive, I changed the function to avoid it. * health_connection: dead code Remove dead code from the code. * health_connection: Memory Leak Remove memory leak when clean simple pattern * health_connection: Script format With this commit I am formatting the last message to return for the default color on terminal * health_connection: Script format 2 With this commit I am formatting the last message to return for the default color on terminal * health_connection: Script format 3 With this commit I am formatting the error message to return for the default color on terminal
-rw-r--r--.gitignore1
-rw-r--r--database/rrd.h3
-rw-r--r--database/rrdcalc.c191
-rw-r--r--database/rrdcalc.h11
-rw-r--r--database/rrdcalctemplate.c36
-rw-r--r--database/rrdcalctemplate.h8
-rw-r--r--database/rrddim.c35
-rw-r--r--database/rrdhost.c14
-rw-r--r--health/README.md44
-rw-r--r--health/health.c23
-rw-r--r--health/health.h3
-rw-r--r--health/health_config.c183
-rw-r--r--tests/Makefile.am3
-rw-r--r--tests/template_dimension/system_cpu.conf.alarm_foreach8
-rw-r--r--tests/template_dimension/system_cpu.conf.alarm_foreach_sp8
-rw-r--r--tests/template_dimension/system_cpu.conf.template_alarm26
-rw-r--r--tests/template_dimension/system_cpu.conf.template_foreach8
-rw-r--r--tests/template_dimension/system_cpu.conf.template_foreach_sp8
-rw-r--r--tests/template_dimension/system_cpu.conf.unique_alarm26
-rw-r--r--tests/template_dimension/template_dim.sh.in88
20 files changed, 636 insertions, 91 deletions
diff --git a/.gitignore b/.gitignore
index 487ea72a39..afb0c67bd8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -170,6 +170,7 @@ sitespeed-result/
tests/acls/acl.sh
tests/urls/request.sh
tests/alarm_repetition/alarm.sh
+tests/template_dimension/template_dim.sh
# tests and temp files
python.d/python-modules-installer.sh
diff --git a/database/rrd.h b/database/rrd.h
index 39e881252d..e335f0dd0b 100644
--- a/database/rrd.h
+++ b/database/rrd.h
@@ -697,6 +697,7 @@ struct rrdhost {
// RRDCALCs may be linked to charts at any point
// (charts may or may not exist when these are loaded)
RRDCALC *alarms;
+ RRDCALC *alarms_with_foreach;
avl_tree_lock alarms_idx_health_log;
avl_tree_lock alarms_idx_name;
@@ -709,6 +710,7 @@ struct rrdhost {
// these are used to create alarms when charts
// are created or renamed, that match them
RRDCALCTEMPLATE *templates;
+ RRDCALCTEMPLATE *alarms_template_with_foreach;
// ------------------------------------------------------------------------
@@ -1008,6 +1010,7 @@ static inline time_t rrdset_slot2time(RRDSET *st, size_t slot) {
// ----------------------------------------------------------------------------
// RRD DIMENSION functions
+extern void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host);
extern RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode);
#define rrddim_add(st, id, name, multiplier, divisor, algorithm) rrddim_add_custom(st, id, name, multiplier, divisor, algorithm, (st)->rrd_memory_mode)
diff --git a/database/rrdcalc.c b/database/rrdcalc.c
index 908fc2ebfd..2872fa868b 100644
--- a/database/rrdcalc.c
+++ b/database/rrdcalc.c
@@ -255,6 +255,53 @@ inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const ch
return host->health_log.next_alarm_id++;
}
+/**
+ * Alarm name with dimension
+ *
+ * Change the name of the current alarm appending a new diagram.
+ *
+ * @param name the alarm name
+ * @param namelen is the length of the previous vector.
+ * @param dim the dimension of the chart.
+ * @param dimlen is the length of the previous vector.
+ *
+ * @return It returns the new name on success and the old otherwise
+ */
+char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen) {
+ char *newname,*move;
+
+ newname = malloc(namelen + dimlen + 2);
+ if(newname) {
+ move = newname;
+ memcpy(move, name, namelen);
+ move += namelen;
+
+ *move++ = '_';
+ memcpy(move, dim, dimlen);
+ move += dimlen;
+ *move = '\0';
+ } else {
+ newname = name;
+ }
+
+ return newname;
+}
+
+/**
+ * Remove pipe comma
+ *
+ * Remove the pipes and commas converting to space.
+ *
+ * @param str the string to change.
+ */
+void dimension_remove_pipe_comma(char *str) {
+ while(*str) {
+ if(*str == '|' || *str == ',') *str = ' ';
+
+ str++;
+ }
+}
+
inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) {
rrdhost_check_rdlock(host);
@@ -282,24 +329,39 @@ inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) {
rc->critical->rrdcalc = rc;
}
- // link it to the host
- if(likely(host->alarms)) {
- // append it
- RRDCALC *t;
- for(t = host->alarms; t && t->next ; t = t->next) ;
- t->next = rc;
- }
- else {
- host->alarms = rc;
- }
+ if(!rc->foreachdim) {
+ // link it to the host alarms list
+ if(likely(host->alarms)) {
+ // append it
+ RRDCALC *t;
+ for(t = host->alarms; t && t->next ; t = t->next) ;
+ t->next = rc;
+ }
+ else {
+ host->alarms = rc;
+ }
- // link it to its chart
- RRDSET *st;
- rrdset_foreach_read(st, host) {
- if(rrdcalc_is_matching_this_rrdset(rc, st)) {
- rrdsetcalc_link(st, rc);
- break;
+ // link it to its chart
+ RRDSET *st;
+ rrdset_foreach_read(st, host) {
+ if(rrdcalc_is_matching_this_rrdset(rc, st)) {
+ rrdsetcalc_link(st, rc);
+ break;
+ }
}
+ } else {
+ //link it case there is a foreach
+ if(likely(host->alarms_with_foreach)) {
+ // append it
+ RRDCALC *t;
+ for(t = host->alarms_with_foreach; t && t->next ; t = t->next) ;
+ t->next = rc;
+ }
+ else {
+ host->alarms_with_foreach = rc;
+ }
+
+ //I am not linking this alarm direct to the host here, this will be done when the children is created
}
}
@@ -311,13 +373,19 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
RRDCALC *rc = callocz(1, sizeof(RRDCALC));
rc->next_event_id = 1;
- rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id);
rc->name = strdupz(rt->name);
rc->hash = simple_hash(rc->name);
rc->chart = strdupz(chart);
rc->hash_chart = simple_hash(rc->chart);
+ rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id);
+
if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions);
+ if(rt->foreachdim) {
+ rc->foreachdim = strdupz(rt->foreachdim);
+ rc->spdim = health_pattern_from_foreach(rc->foreachdim);
+ }
+ rc->foreachcounter = rt->foreachcounter;
rc->green = rt->green;
rc->red = rt->red;
@@ -361,7 +429,7 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source);
}
- debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
+ debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u",
(rc->chart)?rc->chart:"NOCHART",
rc->name,
(rc->exec)?rc->exec:"DEFAULT",
@@ -373,6 +441,7 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
rc->before,
rc->options,
(rc->dimensions)?rc->dimensions:"NONE",
+ (rc->foreachdim)?rc->foreachdim:"NONE",
rc->update_every,
(rc->calculation)?rc->calculation->parsed_as:"NONE",
(rc->warning)?rc->warning->parsed_as:"NONE",
@@ -387,18 +456,94 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt,
);
rrdcalc_add_to_host(host, rc);
- RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)rc);
- if (rdcmp != rc) {
- error("Cannot insert the alarm index ID %s",rc->name);
+ if(!rt->foreachdim) {
+ RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)rc);
+ if (rdcmp != rc) {
+ error("Cannot insert the alarm index ID %s",rc->name);
+ }
}
return rc;
}
+/**
+ * Create from RRDCALC
+ *
+ * Create a new alarm using another alarm as template.
+ *
+ * @param rc is the alarm that will be used as source
+ * @param host is the host structure.
+ * @param name is the newest chart name.
+ * @param dimension is the current dimension
+ * @param foreachdim the whole list of dimension
+ *
+ * @return it returns the new alarm changed.
+ */
+inline RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const char *name, const char *dimension) {
+ RRDCALC *newrc = callocz(1, sizeof(RRDCALC));
+
+ newrc->next_event_id = 1;
+ newrc->id = rrdcalc_get_unique_id(host, rc->chart, name, &rc->next_event_id);
+ newrc->name = (char *)name;
+ newrc->hash = simple_hash(newrc->name);
+ newrc->chart = strdupz(rc->chart);
+ newrc->hash_chart = simple_hash(rc->chart);
+
+ newrc->dimensions = strdupz(dimension);
+ newrc->foreachdim = NULL;
+ rc->foreachcounter++;
+ newrc->foreachcounter = rc->foreachcounter;
+
+ newrc->green = rc->green;
+ newrc->red = rc->red;
+ newrc->value = NAN;
+ newrc->old_value = NAN;
+
+ newrc->delay_up_duration = rc->delay_up_duration;
+ newrc->delay_down_duration = rc->delay_down_duration;
+ newrc->delay_max_duration = rc->delay_max_duration;
+ newrc->delay_multiplier = rc->delay_multiplier;
+
+ newrc->last_repeat = 0;
+ newrc->warn_repeat_every = rc->warn_repeat_every;
+ newrc->crit_repeat_every = rc->crit_repeat_every;
+
+ newrc->group = rc->group;
+ newrc->after = rc->after;
+ newrc->before = rc->before;
+ newrc->update_every = rc->update_every;
+ newrc->options = rc->options;
+
+ if(rc->exec) newrc->exec = strdupz(rc->exec);
+ if(rc->recipient) newrc->recipient = strdupz(rc->recipient);
+ if(rc->source) newrc->source = strdupz(rc->source);
+ if(rc->units) newrc->units = strdupz(rc->units);
+ if(rc->info) newrc->info = strdupz(rc->info);
+
+ if(rc->calculation) {
+ newrc->calculation = expression_parse(rc->calculation->source, NULL, NULL);
+ if(!newrc->calculation)
+ error("Health alarm '%s.%s': failed to parse calculation expression '%s'", rc->chart, rc->name, rc->calculation->source);
+ }
+
+ if(rc->warning) {
+ newrc->warning = expression_parse(rc->warning->source, NULL, NULL);
+ if(!newrc->warning)
+ error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", rc->chart, rc->name, rc->warning->source);
+ }
+
+ if(rc->critical) {
+ newrc->critical = expression_parse(rc->critical->source, NULL, NULL);
+ if(!newrc->critical)
+ error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", rc->chart, rc->name, rc->critical->source);
+ }
+
+ return newrc;
+}
+
void rrdcalc_free(RRDCALC *rc) {
if(unlikely(!rc)) return;
-
expression_free(rc->calculation);
expression_free(rc->warning);
expression_free(rc->critical);
@@ -407,11 +552,13 @@ void rrdcalc_free(RRDCALC *rc) {
freez(rc->chart);
freez(rc->family);
freez(rc->dimensions);
+ freez(rc->foreachdim);
freez(rc->exec);
freez(rc->recipient);
freez(rc->source);
freez(rc->units);
freez(rc->info);
+ simple_pattern_free(rc->spdim);
freez(rc);
}
diff --git a/database/rrdcalc.h b/database/rrdcalc.h
index f0c34b5439..e0b6325971 100644
--- a/database/rrdcalc.h
+++ b/database/rrdcalc.h
@@ -37,7 +37,7 @@ struct rrdcalc {
uint32_t next_event_id; // the next event id that will be used for this alarm
char *name; // the name of this alarm
- uint32_t hash;
+ uint32_t hash; // the hash of the alarm name
char *exec; // the command to execute when this alarm switches state
char *recipient; // the recipient of the alarm (the first parameter to exec)
@@ -59,7 +59,11 @@ struct rrdcalc {
// database lookup settings
char *dimensions; // the chart dimensions
- RRDR_GROUPING group; // grouping method: average, max, etc.
+ char *foreachdim; // the group of dimensions that the `foreach` will be applied.
+ SIMPLE_PATTERN *spdim; // used if and only if there is a simple pattern for the chart.
+ int foreachcounter; // the number of alarms created with foreachdim, this also works as an id of the
+ // children
+ RRDR_GROUPING group; // grouping method: average, max, etc.
int before; // ending point in time-series
int after; // starting point in time-series
uint32_t options; // calculation options
@@ -148,7 +152,10 @@ extern void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc);
extern int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name);
extern uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id);
extern RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart);
+extern RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const char *name, const char *dimension);
extern void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc);
+extern void dimension_remove_pipe_comma(char *str);
+extern char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen);
static inline int rrdcalc_isrepeating(RRDCALC *rc) {
if (unlikely(rc->warn_repeat_every > 0 || rc->crit_repeat_every > 0)) {
diff --git a/database/rrdcalctemplate.c b/database/rrdcalctemplate.c
index f2b9767c6d..f7a0855611 100644
--- a/database/rrdcalctemplate.c
+++ b/database/rrdcalctemplate.c
@@ -5,23 +5,35 @@
// ----------------------------------------------------------------------------
// RRDCALCTEMPLATE management
+/**
+ * RRDCALC TEMPLATE LINK MATCHING
+ *
+ * @param rt is the template used to create the chart.
+ * @param st is the chart where the alarm will be attached.
+ */
+void rrdcalctemplate_link_matching_test(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host ) {
+ if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context)
+ && (!rt->family_pattern || simple_pattern_matches(rt->family_pattern, st->family))) {
+ RRDCALC *rc = rrdcalc_create_from_template(host, rt, st->id);
+ if(unlikely(!rc))
+ info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname);
+#ifdef NETDATA_INTERNAL_CHECKS
+ else if(rc->rrdset != st && !rc->foreachdim) //When we have a template with foreadhdim, the child will be added to the index late
+ error("Health alarm '%s.%s' should be linked to chart '%s', but it is not", rc->chart?rc->chart:"NOCHART", rc->name, st->id);
+#endif
+ }
+}
void rrdcalctemplate_link_matching(RRDSET *st) {
RRDHOST *host = st->rrdhost;
RRDCALCTEMPLATE *rt;
for(rt = host->templates; rt ; rt = rt->next) {
- if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context)
- && (!rt->family_pattern || simple_pattern_matches(rt->family_pattern, st->family))) {
- RRDCALC *rc = rrdcalc_create_from_template(host, rt, st->id);
- if(unlikely(!rc))
- info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname);
+ rrdcalctemplate_link_matching_test(rt, st, host);
+ }
-#ifdef NETDATA_INTERNAL_CHECKS
- else if(rc->rrdset != st)
- error("Health alarm '%s.%s' should be linked to chart '%s', but it is not", rc->chart?rc->chart:"NOCHART", rc->name, st->id);
-#endif
- }
+ for(rt = host->alarms_template_with_foreach; rt ; rt = rt->next) {
+ rrdcalctemplate_link_matching_test(rt, st, host);
}
}
@@ -43,6 +55,8 @@ inline void rrdcalctemplate_free(RRDCALCTEMPLATE *rt) {
freez(rt->units);
freez(rt->info);
freez(rt->dimensions);
+ freez(rt->foreachdim);
+ simple_pattern_free(rt->spdim);
freez(rt);
}
@@ -67,5 +81,3 @@ inline void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt)
rrdcalctemplate_free(rt);
}
-
-
diff --git a/database/rrdcalctemplate.h b/database/rrdcalctemplate.h
index 92bb4138e2..676b4cf645 100644
--- a/database/rrdcalctemplate.h
+++ b/database/rrdcalctemplate.h
@@ -35,7 +35,11 @@ struct rrdcalctemplate {
// database lookup settings
char *dimensions; // the chart dimensions
- RRDR_GROUPING group; // grouping method: average, max, etc.
+ char *foreachdim; // the group of dimensions that the lookup will be applied.
+ SIMPLE_PATTERN *spdim; // used if and only if there is a simple pattern for the chart.
+ int foreachcounter; // the number of alarms created with foreachdim, this also works as an id of the
+ // children
+ RRDR_GROUPING group; // grouping method: average, max, etc.
int before; // ending point in time-series
int after; // starting point in time-series
uint32_t options; // calculation options
@@ -70,5 +74,5 @@ extern void rrdcalctemplate_link_matching(RRDSET *st);
extern void rrdcalctemplate_free(RRDCALCTEMPLATE *rt);
extern void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt);
-
+extern void rrdcalctemplate_create_alarms(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st);
#endif //NETDATA_RRDCALCTEMPLATE_H
diff --git a/database/rrddim.c b/database/rrddim.c
index 019ca34a19..3600a7744e 100644
--- a/database/rrddim.c
+++ b/database/rrddim.c
@@ -156,6 +156,35 @@ static time_t rrddim_query_oldest_time(RRDDIM *rd) {
// ----------------------------------------------------------------------------
// RRDDIM create a dimension
+void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host) {
+ RRDCALC *rrdc;
+ for (rrdc = host->alarms_with_foreach; rrdc ; rrdc = rrdc->next) {
+ if (simple_pattern_matches(rrdc->spdim, rd->id) || simple_pattern_matches(rrdc->spdim, rd->name)) {
+ if (!strcmp(rrdc->chart, st->name)) {
+ char *usename = alarm_name_with_dim(rrdc->name, strlen(rrdc->name), rd->name, strlen(rd->name));
+ if (usename) {
+ if(rrdcalc_exists(host, st->name, usename, 0, 0)){
+ freez(usename);
+ continue;
+ }
+
+ RRDCALC *child = rrdcalc_create_from_rrdcalc(rrdc, host, usename, rd->name);
+ if (child) {
+ rrdcalc_add_to_host(host, child);
+ RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)child);
+ if (rdcmp != child) {
+ error("Cannot insert the alarm index ID %s",child->name);
+ }
+ } else {
+ error("Cannot allocate a new alarm.");
+ rrdc->foreachcounter--;
+ }
+ }
+ }
+ }
+ }
+}
+
RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode) {
rrdset_wrlock(st);
@@ -371,6 +400,12 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte
if(unlikely(rrddim_index_add(st, rd) != rd))
error("RRDDIM: INTERNAL ERROR: attempt to index duplicate dimension '%s' on chart '%s'", rd->id, st->id);
+ if(host->alarms_with_foreach || host->alarms_template_with_foreach) {
+ rrdhost_wrlock(host);
+ rrdcalc_link_to_rrddim(rd, st, host);
+
+ rrdhost_unlock(host);
+ }
rrdset_unlock(st);
return(rd);
}
diff --git a/database/rrdhost.c b/database/rrdhost.c
index d6252d2065..9075787b05 100644
--- a/database/rrdhost.c
+++ b/database/rrdhost.c
@@ -598,9 +598,23 @@ void rrdhost_free(RRDHOST *host) {
while(host->alarms)
rrdcalc_unlink_and_free(host, host->alarms);
+ RRDCALC *rc,*nc;
+ for(rc = host->alarms_with_foreach; rc ; rc = nc) {
+ nc = rc->next;
+ rrdcalc_free(rc);
+ }
+ host->alarms_with_foreach = NULL;
+
while(host->templates)
rrdcalctemplate_unlink_and_free(host, host->templates);
+ RRDCALCTEMPLATE *rt,*next;
+ for(rt = host->alarms_template_with_foreach; rt ; rt = next) {
+ next = rt->next;
+ rrdcalctemplate_free(rt);
+ }
+ host->alarms_template_with_foreach = NULL;
+
debug(D_RRD_CALLS, "RRDHOST: Cleaning up remaining host variables for host '%s'", host->hostname);
rrdvar_free_remaining_variables(host, &host->rrdvar_root_index);
diff --git a/health/README.md b/health/README.md
index ab8d6882a1..0ffbbdb51d 100644
--- a/health/README.md
+++ b/health/README.md
@@ -163,7 +163,7 @@ This line makes a database lookup to find a value. This result of this lookup is
The format is:
```
-lookup: METHOD AFTER [at BEFORE] [every DURATION] [OPTIONS] [of DIMENSIONS]
+lookup: METHOD AFTER [at BEFORE] [every DURATION] [OPTIONS] [of DIMENSIONS] [foreach DIMENSIONS]
```
Everything is the same with [badges](../web/api/badges/). In short:
@@ -190,6 +190,11 @@ Everything is the same with [badges](../web/api/badges/). In short:
have spaces in their names). This accepts Netdata simple patterns and the `match-ids` and
`match-names` options affect the searches for dimensions.
+- `foreach DIMENSIONS` is optional, will always be the last parameter, and uses the same `,`/`|`
+ rules as the `of` parameter. Each dimension you specify in `foreach` will use the same rule
+ to trigger an alarm. If you set both `of` and `foreach`, Netdata will ignore the `of` parameter
+ and replace it with one of the dimensions you gave to `foreach`.
+
The result of the lookup will be available as `$this` and `$NAME` in expressions.
The timestamps of the timeframe evaluated by the database lookup is available as variables
`$after` and `$before` (both are unix timestamps).
@@ -660,6 +665,43 @@ Note that the drops chart does not exist if a network interface has never droppe
When Netdata detects a dropped packet, it will add the chart and it will automatically attach this
alarm to it.
+### Example 5
+
+Check if user or system dimension is using more than 50% of cpu:
+
+```
+ alarm: dim_template
+ on: system.cpu
+ os: linux
+lookup: average -3s percentage foreach system,user
+ units: %
+ every: 10s
+ warn: $this > 50
+ crit: $this > 80
+```
+
+The `lookup` line will calculate the average CPU usage from system and user in the last 3 seconds. Because we have
+the foreach in the `lookup` line, Netdata will create two independent alarms called `dim_template_system`
+and `dim_template_user` that will have all the other parameters shared among them.
+
+### Example 6
+
+Check if all dimensions are using more than 50% of cpu:
+
+```
+ alarm: dim_template
+ on: system.cpu
+ os: linux
+lookup: average -3s percentage foreach *
+ units: %
+ every: 10s
+ warn: $this > 50
+ crit: $this > 80
+```
+
+The `lookup` line will calculate the average of CPU usage from system and user in the last 3 seconds. In this case
+Netdata will create alarms for all dimensions of the chart.
+
## Troubleshooting
You can compile Netdata with [debugging](../daemon#debugging) and then set in `netdata.conf`:
diff --git a/health/health.c b/health/health.c
index 592e6a5be2..329191fb88 100644
--- a/health/health.c
+++ b/health/health.c
@@ -113,9 +113,23 @@ void health_reload_host(RRDHOST *host) {
while(host->templates)
rrdcalctemplate_unlink_and_free(host, host->templates);
+ RRDCALCTEMPLATE *rt,*next;
+ for(rt = host->alarms_template_with_foreach; rt ; rt = next) {
+ next = rt->next;
+ rrdcalctemplate_free(rt);
+ }
+ host->alarms_template_with_foreach = NULL;
+
while(host->alarms)
rrdcalc_unlink_and_free(host, host->alarms);
+ RRDCALC *rc,*nc;
+ for(rc = host->alarms_with_foreach; rc ; rc = nc) {
+ nc = rc->next;
+ rrdcalc_free(rc);
+ }
+ host->alarms_with_foreach = NULL;
+
rrdhost_unlock(host);
// invalidate all previous entries in the alarm log
@@ -139,9 +153,17 @@ void health_reload_host(RRDHOST *host) {
health_readdir(host, user_path, stock_path, NULL);
// link the loaded alarms to their charts
+ RRDDIM *rd;
rrdset_foreach_write(st, host) {
rrdsetcalc_link_matching(st);
rrdcalctemplate_link_matching(st);
+
+ //This loop must be the last, because ` rrdcalctemplate_link_matching` will create alarms related to it.
+ rrdset_rdlock(st);
+ rrddim_foreach_read(rd, st) {
+ rrdcalc_link_to_rrddim(rd, st, host);
+ }
+ rrdset_unlock(st);
}
rrdhost_unlock(host);
@@ -888,6 +910,7 @@ void *health_main(void *ptr) {
}
}
}
+
if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) {
rc->last_repeat = now;
ALARM_ENTRY *ae = health_create_alarm_entry(
diff --git a/health/health.h b/health/health.h
index 8e4d0f7cb3..ab367e9033 100644
--- a/health/health.h
+++ b/health/health.h
@@ -48,6 +48,7 @@ extern unsigned int default_health_enabled;
#define HEALTH_INFO_KEY "info"
#define HEALTH_DELAY_KEY "delay"
#define HEALTH_OPTIONS_KEY "options"
+#define HEALTH_FOREACH_KEY "foreach"
#define HEALTH_SILENCERS_MAX_FILE_LEN 10000
@@ -106,4 +107,6 @@ extern void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae);
extern void *health_cmdapi_thread(void *ptr);
+extern SIMPLE_PATTERN *health_pattern_from_foreach(char *s);
+
#endif //NETDATA_HEALTH_H
diff --git a/health/health_config.c b/health/health_config.c
index 0d6e77a9e4..65c6d8bd7f 100644
--- a/health/health_config.c
+