summaryrefslogtreecommitdiffstats
path: root/health/health.h
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2024-01-23 20:20:41 +0200
committerGitHub <noreply@github.com>2024-01-23 20:20:41 +0200
commitf466b8aef52c1ea394651f5fe6b4586a5e39e5af (patch)
tree96130302033cc5df6b2a5683931138c72e4ab88e /health/health.h
parent33412db1f50d833e56889ea4762725dfde5d6d8f (diff)
DYNCFG: dynamically configured alerts (#16779)
* cleanup alerts * fix references * fix references * fix references * load alerts once and apply them to each node * simplify health_create_alarm_entry() * Compile without warnings with compiler flags: -Wall -Wextra -Wformat=2 -Wshadow -Wno-format-nonliteral -Winit-self * code re-organization and cleanup * generate patterns when applying prototypes; give unique dyncfg names to all alerts * eval expressions keep the source and the parsed_as as STRING pointers * renamed host to node in dyncfg ids * renamed host to node in dyncfg ids * add all cloud roles to the list of parsed X-Netdata-Role header and also default to member access level * working functionality * code re-organization: moved health event-loop to a new file, moved health globals to health.c * rrdcalctemplate is removed; alert_cfg is removed; foreach dimension is removed; RRDCALCs are now instanciated only when they are linked to RRDSETs * dyncfg alert prototypes initialization for alerts * health dyncfg split to separate file * cleanup not-needed code * normalize matches between parsing and json * also detect !* for disabled alerts * dyncfg capability disabled * Store alert config part1 * Add rrdlabels_common_count * wip health variables lookup without indexes * Improve rrdlabels_common_count by reusing rrdlabels_find_label_with_key_unsafe with an additional parameter * working variables with runtime lookup * working variables with runtime lookup * delete rrddimvar and rrdfamily index * remove rrdsetvar; now all variables are in RRDVARs inside hosts and charts * added /api/v1/variable that resolves a variable the same way alerts do * remove rrdcalc from eval * remove debug code * remove duplicate assignment * Fix memory leak * all alert variables are now handled by alert_variable_lookup() and EVAL is now independent of alerts * hide all internal structures of EVAL * Enable -Wformat flag Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> * Adjust binding for calculation, warning, critical * Remove unused macro * Update config hash id * use the right info and summary in alerts log * use synchronous queries for alerts * Handle cases when config_hash_id is missing from health_log * remove deadlock from health worker * parsing to json payload for health alert prototypes * cleaner parsing and avoiding memory leaks in case of duplicate members in json * fix left-over rename of function * Keep original lookup field to send to the cloud Cleanup / rename function to store config Remove unused DEFINEs, functions * Use ac->lookup * link jobs to the host when the template is registered; do not accept running a function without a host * full dyncfg support for health alerts, except action TEST * working dyncfg additions, updates, removals * fixed missing source, wrong status updates * add alerts by type, component, classification, recipient and module at the /api/v2/alerts endpoint * fix dyncfg unittest * rename functions * generalize the json-c parser macros and move them to libnetdata * report progress when enabling and disabling dyncfg templates * moved rrdcalc and rrdvar to health * update alarms * added schema for alerts; separated alert_action_options from rrdr_options; restructured the json payload for alerts * enable parsed json alerts; allow sending back accepted but disabled * added format_version for alerts payload; enables/disables status now is also inheritted by the status of the rules; fixed variable names in json output * remove the RRDHOST pointer from DYNCFG * Fix command field submitted to the cloud * do not send updates to creation requests, for DYNCFG jobs --------- Signed-off-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Co-authored-by: Tasos Katsoulas <tasos@netdata.cloud> Co-authored-by: ilyam8 <ilya@netdata.cloud>
Diffstat (limited to 'health/health.h')
-rw-r--r--health/health.h41
1 files changed, 14 insertions, 27 deletions
diff --git a/health/health.h b/health/health.h
index a107500b3b..882309cae1 100644
--- a/health/health.h
+++ b/health/health.h
@@ -4,8 +4,7 @@
#define NETDATA_HEALTH_H 1
#include "daemon/common.h"
-
-extern unsigned int default_health_enabled;
+#include "rrdcalc.h"
typedef enum __attribute__((packed)) {
HEALTH_ENTRY_FLAG_PROCESSED = 0x00000001, // notifications engine has processed this
@@ -41,20 +40,17 @@ void health_entry_flags_to_json_array(BUFFER *wb, const char *key, HEALTH_ENTRY_
#define HEALTH_SILENCERS_MAX_FILE_LEN 10000
-extern char *silencers_filename;
-extern SIMPLE_PATTERN *conf_enabled_alarms;
-extern DICTIONARY *health_rrdvars;
-
-void health_init(void);
+void health_plugin_init(void);
+void health_plugin_destroy(void);
-void health_reload(void);
+void health_plugin_reload(void);
void health_aggregate_alarms(RRDHOST *host, BUFFER *wb, BUFFER* context, RRDCALC_STATUS status);
void health_alarms2json(RRDHOST *host, BUFFER *wb, int all);
void health_alert2json_conf(RRDHOST *host, BUFFER *wb, CONTEXTS_V2_OPTIONS all);
void health_alarms_values2json(RRDHOST *host, BUFFER *wb, int all);
-void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf);
+void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *wb);
void health_api_v1_chart_custom_variables2json(RRDSET *st, BUFFER *buf);
int health_alarm_log_open(RRDHOST *host);
@@ -63,34 +59,18 @@ void health_alarm_log_load(RRDHOST *host);
ALARM_ENTRY* health_create_alarm_entry(
RRDHOST *host,
- uint32_t alarm_id,
- uint32_t alarm_event_id,
- const uuid_t config_hash_id,
+ RRDCALC *rc,
time_t when,
- STRING *name,
- STRING *chart,
- STRING *chart_context,
- STRING *chart_id,
- STRING *classification,
- STRING *component,
- STRING *type,
- STRING *exec,
- STRING *recipient,
time_t duration,
NETDATA_DOUBLE old_value,
NETDATA_DOUBLE new_value,
RRDCALC_STATUS old_status,
RRDCALC_STATUS new_status,
- STRING *source,
- STRING *units,
- STRING *summary,
- STRING *info,
int delay,
HEALTH_ENTRY_FLAGS flags);
void health_alarm_log_add_entry(RRDHOST *host, ALARM_ENTRY *ae);
-void health_readdir(RRDHOST *host, const char *user_path, const char *stock_path, const char *subpath);
char *health_user_config_dir(void);
char *health_stock_config_dir(void);
void health_alarm_log_free(RRDHOST *host);
@@ -100,7 +80,6 @@ void health_alarm_log_free_one_nochecks_nounlink(ALARM_ENTRY *ae);
void *health_cmdapi_thread(void *ptr);
char *health_edit_command_from_source(const char *source);
-void sql_refresh_hashes(void);
void health_string2json(BUFFER *wb, const char *prefix, const char *label, const char *value, const char *suffix);
@@ -108,4 +87,12 @@ void health_log_alert_transition_with_trace(RRDHOST *host, ALARM_ENTRY *ae, int
#define health_log_alert(host, ae) health_log_alert_transition_with_trace(host, ae, __LINE__, __FILE__, __FUNCTION__)
bool health_alarm_log_get_global_id_and_transition_id_for_rrdcalc(RRDCALC *rc, usec_t *global_id, uuid_t *transitions_id);
+int alert_variable_lookup_trace(RRDHOST *host, RRDSET *st, const char *variable, BUFFER *wb);
+
+#include "health_prototypes.h"
+#include "health_silencers.h"
+
+typedef void (*prototype_metadata_cb_t)(void *data, STRING *type, STRING *component, STRING *classification, STRING *recipient);
+void health_prototype_metadata_foreach(void *data, prototype_metadata_cb_t cb);
+
#endif //NETDATA_HEALTH_H