diff options
author | thiagoftsm <thiagoftsm@gmail.com> | 2019-09-27 12:24:54 +0000 |
---|---|---|
committer | Chris Akritidis <43294513+cakrit@users.noreply.github.com> | 2019-09-27 14:24:54 +0200 |
commit | e3471fa5727bcf286dd3b52ec0cdecd8fdf7067e (patch) | |
tree | f2fa1e28143ecdae85536e8488965a7be6c7bfe2 | |
parent | a8b28bfbd2fe5a1814e6ddbb211961158f221fda (diff) |
Create a template for all dimensions (#6560)
* health_connection: Comments inside Health Config
To try to understand better what is necessary to change and where it is necessary
to change anything inside the health, I commented the functions inside this file"
"
* health_connection: Comments about Health in other files
This commit brings the rest of the comments that were missed for health"
* health_connection: Comments on health_log
I had to append more comments on health_log
* health_connection: Create a new variable
New variable is created to work with foreach
* health_connection: Fix new option and doc
The first implementation of the 'foreach' had a problem, this fixes the error.
This commit also brings the updates for the documentation
* health_connection: Understanding health
This commit is to save the place that I am working, it has the map to understand all the alam process
* health_connection: Update map
I changed the position of the error message to identify the correct place to add new alarms
* health_connection: End of simple alarm
This commit finishes what is necessary to bring the same lookup for different dimensions in one unique line
* health_connection: Documentation and template steps
This commit brings the documentation missed for template and comments to help in the next
step of apply a template to create an alarm.
* health_connection: Restoring
After some tests, it was detected that the alarms were not working as expected
* health_connection: Fix bug and bring dimension to template
This commit brings a fix for an old Netdata bug, before this the Netdata always tried to create
a new entry in an index with the same id raising an error.
It also brings the possibility to use 'foreach' in template
* health_connection: Fix cmake compilation
There was a problem with cmake compilation fixed by this commit
* health_connection: shell script
Finilize the shell script to test the PR
* health_connection: Remove debug message
During the development, I used some messages to understand the code
this commit removes the last message
* health_connection: Fix bugs
This commits fix bugs reported by tests
* health_connection: Alarm working
This commit brings the necessary change for the alarms work, but it is missing the unlink from the newest list
* health_connection: Template code written
This commit finishes the creation of alarm from template, but it was not tested yet.
* health_connection: Remove comments
I am removing the comments from this PR to bring back late
* health_connection: Remove lines
Another commit to restore the files before they to be commented
* health_connection: New alarm and remove messages
I am bringing a new alarm to test template with SP and removing comments used during the development
* health_connection: Functional test review
After to review the functional test script, it was necessary to small adjust to
test all the features available with the new version
* health_connection: Free structure
I am moving the free list for the correct place, the previous place was not safe
* health_connection: ShellCheck
This commit fixes the problems with shellcheck
* health_connection: FIx hash
This commit fix the hash calculation that was using wrong input
* health_connection: Fix message error
The system was showing a wronge message, because when we have foreach
the alarm created with templated is added in a second stage to the index
* health_connection: Fix documentation
In this commit I am fixing the grammar of the previous doc and bringing
two examples
* health_connection: Fix examples
This commit fix the last two examples that was brought in this PR
* health_connection: Fix example doc
When I brought the correct grammar in the last commit, I lost a mark
* health_connection: Grammar fix
Fixing grammar of the documentation
* health_connection: Memory leak
This commit fixes the memory leak that was present in the PR
* health_connection: Reload
This commit fix the problem that the alarms were not linked after
to receive a SIGUSR2
* health_connection: False Positive from codacy
Codacy was given a false positive, I changed the function to avoid it.
* health_connection: dead code
Remove dead code from the code.
* health_connection: Memory Leak
Remove memory leak when clean simple pattern
* health_connection: Script format
With this commit I am formatting the last message to return
for the default color on terminal
* health_connection: Script format 2
With this commit I am formatting the last message to return
for the default color on terminal
* health_connection: Script format 3
With this commit I am formatting the error message to return
for the default color on terminal
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | database/rrd.h | 3 | ||||
-rw-r--r-- | database/rrdcalc.c | 191 | ||||
-rw-r--r-- | database/rrdcalc.h | 11 | ||||
-rw-r--r-- | database/rrdcalctemplate.c | 36 | ||||
-rw-r--r-- | database/rrdcalctemplate.h | 8 | ||||
-rw-r--r-- | database/rrddim.c | 35 | ||||
-rw-r--r-- | database/rrdhost.c | 14 | ||||
-rw-r--r-- | health/README.md | 44 | ||||
-rw-r--r-- | health/health.c | 23 | ||||
-rw-r--r-- | health/health.h | 3 | ||||
-rw-r--r-- | health/health_config.c | 183 | ||||
-rw-r--r-- | tests/Makefile.am | 3 | ||||
-rw-r--r-- | tests/template_dimension/system_cpu.conf.alarm_foreach | 8 | ||||
-rw-r--r-- | tests/template_dimension/system_cpu.conf.alarm_foreach_sp | 8 | ||||
-rw-r--r-- | tests/template_dimension/system_cpu.conf.template_alarm | 26 | ||||
-rw-r--r-- | tests/template_dimension/system_cpu.conf.template_foreach | 8 | ||||
-rw-r--r-- | tests/template_dimension/system_cpu.conf.template_foreach_sp | 8 | ||||
-rw-r--r-- | tests/template_dimension/system_cpu.conf.unique_alarm | 26 | ||||
-rw-r--r-- | tests/template_dimension/template_dim.sh.in | 88 |
20 files changed, 636 insertions, 91 deletions
diff --git a/.gitignore b/.gitignore index 487ea72a39..afb0c67bd8 100644 --- a/.gitignore +++ b/.gitignore @@ -170,6 +170,7 @@ sitespeed-result/ tests/acls/acl.sh tests/urls/request.sh tests/alarm_repetition/alarm.sh +tests/template_dimension/template_dim.sh # tests and temp files python.d/python-modules-installer.sh diff --git a/database/rrd.h b/database/rrd.h index 39e881252d..e335f0dd0b 100644 --- a/database/rrd.h +++ b/database/rrd.h @@ -697,6 +697,7 @@ struct rrdhost { // RRDCALCs may be linked to charts at any point // (charts may or may not exist when these are loaded) RRDCALC *alarms; + RRDCALC *alarms_with_foreach; avl_tree_lock alarms_idx_health_log; avl_tree_lock alarms_idx_name; @@ -709,6 +710,7 @@ struct rrdhost { // these are used to create alarms when charts // are created or renamed, that match them RRDCALCTEMPLATE *templates; + RRDCALCTEMPLATE *alarms_template_with_foreach; // ------------------------------------------------------------------------ @@ -1008,6 +1010,7 @@ static inline time_t rrdset_slot2time(RRDSET *st, size_t slot) { // ---------------------------------------------------------------------------- // RRD DIMENSION functions +extern void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host); extern RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode); #define rrddim_add(st, id, name, multiplier, divisor, algorithm) rrddim_add_custom(st, id, name, multiplier, divisor, algorithm, (st)->rrd_memory_mode) diff --git a/database/rrdcalc.c b/database/rrdcalc.c index 908fc2ebfd..2872fa868b 100644 --- a/database/rrdcalc.c +++ b/database/rrdcalc.c @@ -255,6 +255,53 @@ inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const ch return host->health_log.next_alarm_id++; } +/** + * Alarm name with dimension + * + * Change the name of the current alarm appending a new diagram. + * + * @param name the alarm name + * @param namelen is the length of the previous vector. + * @param dim the dimension of the chart. + * @param dimlen is the length of the previous vector. + * + * @return It returns the new name on success and the old otherwise + */ +char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen) { + char *newname,*move; + + newname = malloc(namelen + dimlen + 2); + if(newname) { + move = newname; + memcpy(move, name, namelen); + move += namelen; + + *move++ = '_'; + memcpy(move, dim, dimlen); + move += dimlen; + *move = '\0'; + } else { + newname = name; + } + + return newname; +} + +/** + * Remove pipe comma + * + * Remove the pipes and commas converting to space. + * + * @param str the string to change. + */ +void dimension_remove_pipe_comma(char *str) { + while(*str) { + if(*str == '|' || *str == ',') *str = ' '; + + str++; + } +} + inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) { rrdhost_check_rdlock(host); @@ -282,24 +329,39 @@ inline void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc) { rc->critical->rrdcalc = rc; } - // link it to the host - if(likely(host->alarms)) { - // append it - RRDCALC *t; - for(t = host->alarms; t && t->next ; t = t->next) ; - t->next = rc; - } - else { - host->alarms = rc; - } + if(!rc->foreachdim) { + // link it to the host alarms list + if(likely(host->alarms)) { + // append it + RRDCALC *t; + for(t = host->alarms; t && t->next ; t = t->next) ; + t->next = rc; + } + else { + host->alarms = rc; + } - // link it to its chart - RRDSET *st; - rrdset_foreach_read(st, host) { - if(rrdcalc_is_matching_this_rrdset(rc, st)) { - rrdsetcalc_link(st, rc); - break; + // link it to its chart + RRDSET *st; + rrdset_foreach_read(st, host) { + if(rrdcalc_is_matching_this_rrdset(rc, st)) { + rrdsetcalc_link(st, rc); + break; + } } + } else { + //link it case there is a foreach + if(likely(host->alarms_with_foreach)) { + // append it + RRDCALC *t; + for(t = host->alarms_with_foreach; t && t->next ; t = t->next) ; + t->next = rc; + } + else { + host->alarms_with_foreach = rc; + } + + //I am not linking this alarm direct to the host here, this will be done when the children is created } } @@ -311,13 +373,19 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDCALC *rc = callocz(1, sizeof(RRDCALC)); rc->next_event_id = 1; - rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id); rc->name = strdupz(rt->name); rc->hash = simple_hash(rc->name); rc->chart = strdupz(chart); rc->hash_chart = simple_hash(rc->chart); + rc->id = rrdcalc_get_unique_id(host, rc->chart, rc->name, &rc->next_event_id); + if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions); + if(rt->foreachdim) { + rc->foreachdim = strdupz(rt->foreachdim); + rc->spdim = health_pattern_from_foreach(rc->foreachdim); + } + rc->foreachcounter = rt->foreachcounter; rc->green = rt->green; rc->red = rt->red; @@ -361,7 +429,7 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source); } - debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u", + debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', for each dimension '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f, warn_repeat_every %u, crit_repeat_every %u", (rc->chart)?rc->chart:"NOCHART", rc->name, (rc->exec)?rc->exec:"DEFAULT", @@ -373,6 +441,7 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, rc->before, rc->options, (rc->dimensions)?rc->dimensions:"NONE", + (rc->foreachdim)?rc->foreachdim:"NONE", rc->update_every, (rc->calculation)?rc->calculation->parsed_as:"NONE", (rc->warning)?rc->warning->parsed_as:"NONE", @@ -387,18 +456,94 @@ inline RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, ); rrdcalc_add_to_host(host, rc); - RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)rc); - if (rdcmp != rc) { - error("Cannot insert the alarm index ID %s",rc->name); + if(!rt->foreachdim) { + RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)rc); + if (rdcmp != rc) { + error("Cannot insert the alarm index ID %s",rc->name); + } } return rc; } +/** + * Create from RRDCALC + * + * Create a new alarm using another alarm as template. + * + * @param rc is the alarm that will be used as source + * @param host is the host structure. + * @param name is the newest chart name. + * @param dimension is the current dimension + * @param foreachdim the whole list of dimension + * + * @return it returns the new alarm changed. + */ +inline RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const char *name, const char *dimension) { + RRDCALC *newrc = callocz(1, sizeof(RRDCALC)); + + newrc->next_event_id = 1; + newrc->id = rrdcalc_get_unique_id(host, rc->chart, name, &rc->next_event_id); + newrc->name = (char *)name; + newrc->hash = simple_hash(newrc->name); + newrc->chart = strdupz(rc->chart); + newrc->hash_chart = simple_hash(rc->chart); + + newrc->dimensions = strdupz(dimension); + newrc->foreachdim = NULL; + rc->foreachcounter++; + newrc->foreachcounter = rc->foreachcounter; + + newrc->green = rc->green; + newrc->red = rc->red; + newrc->value = NAN; + newrc->old_value = NAN; + + newrc->delay_up_duration = rc->delay_up_duration; + newrc->delay_down_duration = rc->delay_down_duration; + newrc->delay_max_duration = rc->delay_max_duration; + newrc->delay_multiplier = rc->delay_multiplier; + + newrc->last_repeat = 0; + newrc->warn_repeat_every = rc->warn_repeat_every; + newrc->crit_repeat_every = rc->crit_repeat_every; + + newrc->group = rc->group; + newrc->after = rc->after; + newrc->before = rc->before; + newrc->update_every = rc->update_every; + newrc->options = rc->options; + + if(rc->exec) newrc->exec = strdupz(rc->exec); + if(rc->recipient) newrc->recipient = strdupz(rc->recipient); + if(rc->source) newrc->source = strdupz(rc->source); + if(rc->units) newrc->units = strdupz(rc->units); + if(rc->info) newrc->info = strdupz(rc->info); + + if(rc->calculation) { + newrc->calculation = expression_parse(rc->calculation->source, NULL, NULL); + if(!newrc->calculation) + error("Health alarm '%s.%s': failed to parse calculation expression '%s'", rc->chart, rc->name, rc->calculation->source); + } + + if(rc->warning) { + newrc->warning = expression_parse(rc->warning->source, NULL, NULL); + if(!newrc->warning) + error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", rc->chart, rc->name, rc->warning->source); + } + + if(rc->critical) { + newrc->critical = expression_parse(rc->critical->source, NULL, NULL); + if(!newrc->critical) + error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", rc->chart, rc->name, rc->critical->source); + } + + return newrc; +} + void rrdcalc_free(RRDCALC *rc) { if(unlikely(!rc)) return; - expression_free(rc->calculation); expression_free(rc->warning); expression_free(rc->critical); @@ -407,11 +552,13 @@ void rrdcalc_free(RRDCALC *rc) { freez(rc->chart); freez(rc->family); freez(rc->dimensions); + freez(rc->foreachdim); freez(rc->exec); freez(rc->recipient); freez(rc->source); freez(rc->units); freez(rc->info); + simple_pattern_free(rc->spdim); freez(rc); } diff --git a/database/rrdcalc.h b/database/rrdcalc.h index f0c34b5439..e0b6325971 100644 --- a/database/rrdcalc.h +++ b/database/rrdcalc.h @@ -37,7 +37,7 @@ struct rrdcalc { uint32_t next_event_id; // the next event id that will be used for this alarm char *name; // the name of this alarm - uint32_t hash; + uint32_t hash; // the hash of the alarm name char *exec; // the command to execute when this alarm switches state char *recipient; // the recipient of the alarm (the first parameter to exec) @@ -59,7 +59,11 @@ struct rrdcalc { // database lookup settings char *dimensions; // the chart dimensions - RRDR_GROUPING group; // grouping method: average, max, etc. + char *foreachdim; // the group of dimensions that the `foreach` will be applied. + SIMPLE_PATTERN *spdim; // used if and only if there is a simple pattern for the chart. + int foreachcounter; // the number of alarms created with foreachdim, this also works as an id of the + // children + RRDR_GROUPING group; // grouping method: average, max, etc. int before; // ending point in time-series int after; // starting point in time-series uint32_t options; // calculation options @@ -148,7 +152,10 @@ extern void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc); extern int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name); extern uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id); extern RRDCALC *rrdcalc_create_from_template(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart); +extern RRDCALC *rrdcalc_create_from_rrdcalc(RRDCALC *rc, RRDHOST *host, const char *name, const char *dimension); extern void rrdcalc_add_to_host(RRDHOST *host, RRDCALC *rc); +extern void dimension_remove_pipe_comma(char *str); +extern char *alarm_name_with_dim(char *name, size_t namelen, const char *dim, size_t dimlen); static inline int rrdcalc_isrepeating(RRDCALC *rc) { if (unlikely(rc->warn_repeat_every > 0 || rc->crit_repeat_every > 0)) { diff --git a/database/rrdcalctemplate.c b/database/rrdcalctemplate.c index f2b9767c6d..f7a0855611 100644 --- a/database/rrdcalctemplate.c +++ b/database/rrdcalctemplate.c @@ -5,23 +5,35 @@ // ---------------------------------------------------------------------------- // RRDCALCTEMPLATE management +/** + * RRDCALC TEMPLATE LINK MATCHING + * + * @param rt is the template used to create the chart. + * @param st is the chart where the alarm will be attached. + */ +void rrdcalctemplate_link_matching_test(RRDCALCTEMPLATE *rt, RRDSET *st, RRDHOST *host ) { + if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context) + && (!rt->family_pattern || simple_pattern_matches(rt->family_pattern, st->family))) { + RRDCALC *rc = rrdcalc_create_from_template(host, rt, st->id); + if(unlikely(!rc)) + info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname); +#ifdef NETDATA_INTERNAL_CHECKS + else if(rc->rrdset != st && !rc->foreachdim) //When we have a template with foreadhdim, the child will be added to the index late + error("Health alarm '%s.%s' should be linked to chart '%s', but it is not", rc->chart?rc->chart:"NOCHART", rc->name, st->id); +#endif + } +} void rrdcalctemplate_link_matching(RRDSET *st) { RRDHOST *host = st->rrdhost; RRDCALCTEMPLATE *rt; for(rt = host->templates; rt ; rt = rt->next) { - if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context) - && (!rt->family_pattern || simple_pattern_matches(rt->family_pattern, st->family))) { - RRDCALC *rc = rrdcalc_create_from_template(host, rt, st->id); - if(unlikely(!rc)) - info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname); + rrdcalctemplate_link_matching_test(rt, st, host); + } -#ifdef NETDATA_INTERNAL_CHECKS - else if(rc->rrdset != st) - error("Health alarm '%s.%s' should be linked to chart '%s', but it is not", rc->chart?rc->chart:"NOCHART", rc->name, st->id); -#endif - } + for(rt = host->alarms_template_with_foreach; rt ; rt = rt->next) { + rrdcalctemplate_link_matching_test(rt, st, host); } } @@ -43,6 +55,8 @@ inline void rrdcalctemplate_free(RRDCALCTEMPLATE *rt) { freez(rt->units); freez(rt->info); freez(rt->dimensions); + freez(rt->foreachdim); + simple_pattern_free(rt->spdim); freez(rt); } @@ -67,5 +81,3 @@ inline void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt) rrdcalctemplate_free(rt); } - - diff --git a/database/rrdcalctemplate.h b/database/rrdcalctemplate.h index 92bb4138e2..676b4cf645 100644 --- a/database/rrdcalctemplate.h +++ b/database/rrdcalctemplate.h @@ -35,7 +35,11 @@ struct rrdcalctemplate { // database lookup settings char *dimensions; // the chart dimensions - RRDR_GROUPING group; // grouping method: average, max, etc. + char *foreachdim; // the group of dimensions that the lookup will be applied. + SIMPLE_PATTERN *spdim; // used if and only if there is a simple pattern for the chart. + int foreachcounter; // the number of alarms created with foreachdim, this also works as an id of the + // children + RRDR_GROUPING group; // grouping method: average, max, etc. int before; // ending point in time-series int after; // starting point in time-series uint32_t options; // calculation options @@ -70,5 +74,5 @@ extern void rrdcalctemplate_link_matching(RRDSET *st); extern void rrdcalctemplate_free(RRDCALCTEMPLATE *rt); extern void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt); - +extern void rrdcalctemplate_create_alarms(RRDHOST *host, RRDCALCTEMPLATE *rt, RRDSET *st); #endif //NETDATA_RRDCALCTEMPLATE_H diff --git a/database/rrddim.c b/database/rrddim.c index 019ca34a19..3600a7744e 100644 --- a/database/rrddim.c +++ b/database/rrddim.c @@ -156,6 +156,35 @@ static time_t rrddim_query_oldest_time(RRDDIM *rd) { // ---------------------------------------------------------------------------- // RRDDIM create a dimension +void rrdcalc_link_to_rrddim(RRDDIM *rd, RRDSET *st, RRDHOST *host) { + RRDCALC *rrdc; + for (rrdc = host->alarms_with_foreach; rrdc ; rrdc = rrdc->next) { + if (simple_pattern_matches(rrdc->spdim, rd->id) || simple_pattern_matches(rrdc->spdim, rd->name)) { + if (!strcmp(rrdc->chart, st->name)) { + char *usename = alarm_name_with_dim(rrdc->name, strlen(rrdc->name), rd->name, strlen(rd->name)); + if (usename) { + if(rrdcalc_exists(host, st->name, usename, 0, 0)){ + freez(usename); + continue; + } + + RRDCALC *child = rrdcalc_create_from_rrdcalc(rrdc, host, usename, rd->name); + if (child) { + rrdcalc_add_to_host(host, child); + RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_health_log,(avl *)child); + if (rdcmp != child) { + error("Cannot insert the alarm index ID %s",child->name); + } + } else { + error("Cannot allocate a new alarm."); + rrdc->foreachcounter--; + } + } + } + } + } +} + RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode) { rrdset_wrlock(st); @@ -371,6 +400,12 @@ RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collecte if(unlikely(rrddim_index_add(st, rd) != rd)) error("RRDDIM: INTERNAL ERROR: attempt to index duplicate dimension '%s' on chart '%s'", rd->id, st->id); + if(host->alarms_with_foreach || host->alarms_template_with_foreach) { + rrdhost_wrlock(host); + rrdcalc_link_to_rrddim(rd, st, host); + + rrdhost_unlock(host); + } rrdset_unlock(st); return(rd); } diff --git a/database/rrdhost.c b/database/rrdhost.c index d6252d2065..9075787b05 100644 --- a/database/rrdhost.c +++ b/database/rrdhost.c @@ -598,9 +598,23 @@ void rrdhost_free(RRDHOST *host) { while(host->alarms) rrdcalc_unlink_and_free(host, host->alarms); + RRDCALC *rc,*nc; + for(rc = host->alarms_with_foreach; rc ; rc = nc) { + nc = rc->next; + rrdcalc_free(rc); + } + host->alarms_with_foreach = NULL; + while(host->templates) rrdcalctemplate_unlink_and_free(host, host->templates); + RRDCALCTEMPLATE *rt,*next; + for(rt = host->alarms_template_with_foreach; rt ; rt = next) { + next = rt->next; + rrdcalctemplate_free(rt); + } + host->alarms_template_with_foreach = NULL; + debug(D_RRD_CALLS, "RRDHOST: Cleaning up remaining host variables for host '%s'", host->hostname); rrdvar_free_remaining_variables(host, &host->rrdvar_root_index); diff --git a/health/README.md b/health/README.md index ab8d6882a1..0ffbbdb51d 100644 --- a/health/README.md +++ b/health/README.md @@ -163,7 +163,7 @@ This line makes a database lookup to find a value. This result of this lookup is The format is: ``` -lookup: METHOD AFTER [at BEFORE] [every DURATION] [OPTIONS] [of DIMENSIONS] +lookup: METHOD AFTER [at BEFORE] [every DURATION] [OPTIONS] [of DIMENSIONS] [foreach DIMENSIONS] ``` Everything is the same with [badges](../web/api/badges/). In short: @@ -190,6 +190,11 @@ Everything is the same with [badges](../web/api/badges/). In short: have spaces in their names). This accepts Netdata simple patterns and the `match-ids` and `match-names` options affect the searches for dimensions. +- `foreach DIMENSIONS` is optional, will always be the last parameter, and uses the same `,`/`|` + rules as the `of` parameter. Each dimension you specify in `foreach` will use the same rule + to trigger an alarm. If you set both `of` and `foreach`, Netdata will ignore the `of` parameter + and replace it with one of the dimensions you gave to `foreach`. + The result of the lookup will be available as `$this` and `$NAME` in expressions. The timestamps of the timeframe evaluated by the database lookup is available as variables `$after` and `$before` (both are unix timestamps). @@ -660,6 +665,43 @@ Note that the drops chart does not exist if a network interface has never droppe When Netdata detects a dropped packet, it will add the chart and it will automatically attach this alarm to it. +### Example 5 + +Check if user or system dimension is using more than 50% of cpu: + +``` + alarm: dim_template + on: system.cpu + os: linux +lookup: average -3s percentage foreach system,user + units: % + every: 10s + warn: $this > 50 + crit: $this > 80 +``` + +The `lookup` line will calculate the average CPU usage from system and user in the last 3 seconds. Because we have +the foreach in the `lookup` line, Netdata will create two independent alarms called `dim_template_system` +and `dim_template_user` that will have all the other parameters shared among them. + +### Example 6 + +Check if all dimensions are using more than 50% of cpu: + +``` + alarm: dim_template + on: system.cpu + os: linux +lookup: average -3s percentage foreach * + units: % + every: 10s + warn: $this > 50 + crit: $this > 80 +``` + +The `lookup` line will calculate the average of CPU usage from system and user in the last 3 seconds. In this case +Netdata will create alarms for all dimensions of the chart. + ## Troubleshooting You can compile Netdata with [debugging](../daemon#debugging) and then set in `netdata.conf`: diff --git a/health/health.c b/health/health.c index 592e6a5be2..329191fb88 100644 --- a/health/health.c +++ b/health/health.c @@ -113,9 +113,23 @@ void health_reload_host(RRDHOST *host) { while(host->templates) rrdcalctemplate_unlink_and_free(host, host->templates); + RRDCALCTEMPLATE *rt,*next; + for(rt = host->alarms_template_with_foreach; rt ; rt = next) { + next = rt->next; + rrdcalctemplate_free(rt); + } + host->alarms_template_with_foreach = NULL; + while(host->alarms) rrdcalc_unlink_and_free(host, host->alarms); + RRDCALC *rc,*nc; + for(rc = host->alarms_with_foreach; rc ; rc = nc) { + nc = rc->next; + rrdcalc_free(rc); + } + host->alarms_with_foreach = NULL; + rrdhost_unlock(host); // invalidate all previous entries in the alarm log @@ -139,9 +153,17 @@ void health_reload_host(RRDHOST *host) { health_readdir(host, user_path, stock_path, NULL); // link the loaded alarms to their charts + RRDDIM *rd; rrdset_foreach_write(st, host) { rrdsetcalc_link_matching(st); rrdcalctemplate_link_matching(st); + + //This loop must be the last, because ` rrdcalctemplate_link_matching` will create alarms related to it. + rrdset_rdlock(st); + rrddim_foreach_read(rd, st) { + rrdcalc_link_to_rrddim(rd, st, host); + } + rrdset_unlock(st); } rrdhost_unlock(host); @@ -888,6 +910,7 @@ void *health_main(void *ptr) { } } } + if(unlikely(repeat_every > 0 && (rc->last_repeat + repeat_every) <= now)) { rc->last_repeat = now; ALARM_ENTRY *ae = health_create_alarm_entry( diff --git a/health/health.h b/health/health.h index 8e4d0f7cb3..ab367e9033 100644 --- a/health/health.h +++ b/health/health.h @@ -48,6 +48,7 @@ extern unsigned int default_health_enabled; #define HEALTH_INFO_KEY "info" #define HEALTH_DELAY_KEY "delay" #define HEALTH_OPTIONS_KEY "options" +#define HEALTH_FOREACH_KEY "foreach" #define HEALTH_SILENCERS_MAX_FILE_LEN 10000 @@ -106,4 +107,6 @@ extern void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae); extern void *health_cmdapi_thread(void *ptr); +extern SIMPLE_PATTERN *health_pattern_from_foreach(char *s); + #endif //NETDATA_HEALTH_H diff --git a/health/health_config.c b/health/health_config.c index 0d6e77a9e4..65c6d8bd7f 100644 --- a/health/health_config.c + |