summaryrefslogtreecommitdiffstats
path: root/health/health.c
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-03-02 22:50:48 +0200
committerGitHub <noreply@github.com>2023-03-02 22:50:48 +0200
commit021e252fc5d18a7225c0f4c975b3281016861d3c (patch)
tree63f92adc27419ca9df464635cd85424f52c94179 /health/health.c
parentc4d8d35b9f065f2a847f2780acb4342dabdfd34c (diff)
/api/v2/contexts (#14592)
* preparation for /api/v2/contexts * working /api/v2/contexts * add anomaly rate information in all statistics; when sum-count is requested, return sums and counts instead of averages * minor fix * query targegt now accurately counts hosts, contexts, instances, dimensions, metrics * cleanup /api/v2/contexts * full text search with /api/v2/contexts * simple patterns now support the option to search ignoring case * full text search API with /api/v2/q * simple pattern execution optimization * do not show q when not given * full text search accounting * separated /api/v2/nodes from /api/v2/contexts * fix ssv queries for group_by * count query instances queried and failed per context and host * split rrdcontext.c to multiple files * add query totals * fix anomaly rate calculation; provide "ni" for indexing hosts * do not generate zero valued members * faster calculation of anomaly rate; by just summing integers for each db points and doing math once for every generated point * fix typo when printing dimensions totals * added option minify to remove spaces and newlines fron JSON output * send instance ids and names when they differ * do not add in query target dimensions, instances, contexts and hosts for which there is no retention in the current timeframe * fix for the previous + renames and code cleanup * when a dimension is filtered, include in the response all the other dimensions that are selectable * do not add nodes that do not have retention in the current window * move selection of dimensions to query_dimension_add(), instead of query_metric_add() * increase the pre-processing capacity of queries * generate instance fqdn ids and names only when they are needed * provide detailed statistics about tiers retention, queries, points, update_every * late allocation of query dimensions * cleanup * more cleanup * support for annotations per displayed point, RESET and PARTIAL * new type annotations * if a chart is not linked to contexts and it is collected, link it when it is collected * make ML run reentrant * make ML rrdr query synchronous * optimize replication memory allocation of replication_sort_entry * change units to percentage, when requesting a coefficinet of variation, or a percentage query * initialize replication before starting main threads * properly decrement no room requests counter * propagate the non-zero flag to group-by * the same by avoiding the extra loop * respect non-zero in all dimension arrays * remove dictionary garbage collection from dictionary_entries() and dictionary_version() * be more verbose when jv2 indexing is postponed * prevent infinite loop * use hidden dimensions even when dimensions pattern is unset * traverse hosts using dictionaries * fix dictionary unittests
Diffstat (limited to 'health/health.c')
-rw-r--r--health/health.c40
1 files changed, 17 insertions, 23 deletions
diff --git a/health/health.c b/health/health.c
index 1e20b51b70..4218ede6fb 100644
--- a/health/health.c
+++ b/health/health.c
@@ -357,13 +357,11 @@ static void health_reload_host(RRDHOST *host) {
void health_reload(void) {
sql_refresh_hashes();
- rrd_rdlock();
-
RRDHOST *host;
- rrdhost_foreach_read(host)
+ dfe_start_reentrant(rrdhost_root_index, host){
health_reload_host(host);
-
- rrd_unlock();
+ }
+ dfe_done(host);
#ifdef ENABLE_ACLK
if (netdata_cloud_setting) {
@@ -781,7 +779,8 @@ static void initialize_health(RRDHOST *host)
else
host->health_log.max = (unsigned int)n;
- conf_enabled_alarms = simple_pattern_create(config_get(CONFIG_SECTION_HEALTH, "enabled alarms", "*"), NULL, SIMPLE_PATTERN_EXACT);
+ conf_enabled_alarms = simple_pattern_create(config_get(CONFIG_SECTION_HEALTH, "enabled alarms", "*"), NULL,
+ SIMPLE_PATTERN_EXACT, true);
netdata_rwlock_init(&host->health_log.alarm_log_rwlock);
@@ -803,7 +802,7 @@ static void initialize_health(RRDHOST *host)
// link the loaded alarms to their charts
RRDSET *st;
- rrdset_foreach_write(st, host) {
+ rrdset_foreach_reentrant(st, host) {
if (rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED))
continue;
@@ -838,11 +837,11 @@ static SILENCE_TYPE check_silenced(RRDCALC *rc, const char *host, SILENCERS *sil
for (s = silencers->silencers; s!=NULL; s=s->next){
if (
- (!s->alarms_pattern || (rc->name && s->alarms_pattern && simple_pattern_matches(s->alarms_pattern, rrdcalc_name(rc)))) &&
- (!s->contexts_pattern || (rc->rrdset && rc->rrdset->context && s->contexts_pattern && simple_pattern_matches(s->contexts_pattern, rrdset_context(rc->rrdset)))) &&
- (!s->hosts_pattern || (host && s->hosts_pattern && simple_pattern_matches(s->hosts_pattern,host))) &&
- (!s->charts_pattern || (rc->chart && s->charts_pattern && simple_pattern_matches(s->charts_pattern, rrdcalc_chart_name(rc)))) &&
- (!s->families_pattern || (rc->rrdset && rc->rrdset->family && s->families_pattern && simple_pattern_matches(s->families_pattern, rrdset_family(rc->rrdset))))
+ (!s->alarms_pattern || (rc->name && s->alarms_pattern && simple_pattern_matches_string(s->alarms_pattern, rc->name))) &&
+ (!s->contexts_pattern || (rc->rrdset && rc->rrdset->context && s->contexts_pattern && simple_pattern_matches_string(s->contexts_pattern, rc->rrdset->context))) &&
+ (!s->hosts_pattern || (host && s->hosts_pattern && simple_pattern_matches(s->hosts_pattern, host))) &&
+ (!s->charts_pattern || (rc->chart && s->charts_pattern && simple_pattern_matches_string(s->charts_pattern, rc->chart))) &&
+ (!s->families_pattern || (rc->rrdset && rc->rrdset->family && s->families_pattern && simple_pattern_matches_string(s->families_pattern, rc->rrdset->family)))
) {
debug(D_HEALTH, "Alarm matches command API silence entry %s:%s:%s:%s:%s", s->alarms,s->charts, s->contexts, s->hosts, s->families);
if (unlikely(silencers->stype == STYPE_NONE)) {
@@ -1015,9 +1014,7 @@ void *health_main(void *ptr) {
#endif
worker_is_busy(WORKER_HEALTH_JOB_RRD_LOCK);
- rrd_rdlock();
-
- rrdhost_foreach_read(host) {
+ dfe_start_reentrant(rrdhost_root_index, host) {
if(unlikely(!service_running(SERVICE_HEALTH)))
break;
@@ -1025,11 +1022,8 @@ void *health_main(void *ptr) {
if (unlikely(!host->health.health_enabled))
continue;
- if (unlikely(!rrdhost_flag_check(host, RRDHOST_FLAG_INITIALIZED_HEALTH))) {
- rrd_unlock();
+ if (unlikely(!rrdhost_flag_check(host, RRDHOST_FLAG_INITIALIZED_HEALTH)))
initialize_health(host);
- rrd_rdlock();
- }
health_execute_delayed_initializations(host);
@@ -1494,9 +1488,8 @@ void *health_main(void *ptr) {
}
break;
}
- } //for each host
-
- rrd_unlock();
+ }
+ dfe_done(host);
// wait for all notifications to finish before allowing health to be cleaned up
ALARM_ENTRY *ae;
@@ -1509,7 +1502,7 @@ void *health_main(void *ptr) {
#ifdef ENABLE_ACLK
if (netdata_cloud_setting && unlikely(aclk_alert_reloaded) && loop > (marked_aclk_reload_loop + 2)) {
- rrdhost_foreach_read(host) {
+ dfe_start_reentrant(rrdhost_root_index, host) {
if(unlikely(!service_running(SERVICE_HEALTH)))
break;
@@ -1518,6 +1511,7 @@ void *health_main(void *ptr) {
sql_queue_removed_alerts_to_aclk(host);
}
+ dfe_done(host);
aclk_alert_reloaded = 0;
marked_aclk_reload_loop = 0;
}