diff options
author | Chris Akritidis <43294513+cakrit@users.noreply.github.com> | 2019-01-15 12:49:28 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-01-15 12:49:28 +0100 |
commit | 08649bec373555144878b4314e87c9a8eb38c82e (patch) | |
tree | 23c9246e606f948944af8bc33f976085fa455848 | |
parent | 67834f184ff3d67f6c8b6005ac68b568d7573118 (diff) |
Port ACLs, Management API and Health commands (#4969)
##### Summary
fixes #2673
fixes #2149
fixes #5017
fixes #3830
fixes #3187
fixes #5154
Implements a command API for health which will accept commands via a socket to selectively suppress health checks.
Allows different ports to accept different request types (streaming, dashboard, api, registry, netdata.conf, badges, management)
Removes support for multi-threaded and single-threaded web servers.
##### Component Name
health, daemon
51 files changed, 2078 insertions, 1105 deletions
diff --git a/.gitignore b/.gitignore index f811fce280..7c730b7305 100644 --- a/.gitignore +++ b/.gitignore @@ -51,7 +51,6 @@ cgroup-network !cgroup-network/ # installation artifacts -installer/.environment.sh packaging/installer/.environment.sh *.tar.* *.run @@ -140,6 +139,7 @@ tests/profile/benchmark-line-parsing tests/profile/benchmark-procfile-parser tests/profile/benchmark-value-pairs tests/profile/statsd-stress +tests/health_mgmtapi/health-cmdapi-test.sh oprofile_data/ vgcore.* callgrind.out.* diff --git a/CMakeLists.txt b/CMakeLists.txt index 1c3d207596..cb1e1ef482 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ find_package(PkgConfig REQUIRED) #set(CMAKE_BUILD_TYPE "Release") # set this to see the compilation commands -#set(CMAKE_VERBOSE_MAKEFILE 1) +# set(CMAKE_VERBOSE_MAKEFILE 1) # ----------------------------------------------------------------------------- @@ -30,8 +30,8 @@ IF("${CMAKE_BUILD_TYPE}" MATCHES "Debug") set(CXX_FORMAT_SIGNEDNESS "-Wformat-signedness") set(CXX_FORMAT_SECURITY "-Werror=format-security") set(CXX_STACK_PROTECTOR "-fstack-protector-all") - - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O1 -ggdb -Wall -Wextra -DNETDATA_INTERNAL_CHECKS=1 -DNETDATA_VERIFY_LOCKS=1 ${CXX_FORMAT_SIGNEDNESS} ${CXX_FORMAT_SECURITY} ${CXX_STACK_PROTECTOR}") + set(CXX_FLAGS_DEBUG "-O0") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O1 -ggdb -Wall -Wextra -DNETDATA_INTERNAL_CHECKS=1 -DNETDATA_VERIFY_LOCKS=1 ${CXX_FORMAT_SIGNEDNESS} ${CXX_FORMAT_SECURITY} ${CXX_STACK_PROTECTOR} ${CXX_FLAGS_DEBUG}") ELSE() message(STATUS "building for: release") cmake_policy(SET CMP0069 "NEW") @@ -221,8 +221,7 @@ set(HEALTH_PLUGIN_FILES health/health.h health/health_config.c health/health_json.c - health/health_log.c - ) + health/health_log.c) set(IDLEJITTER_PLUGIN_FILES collectors/idlejitter.plugin/plugin_idlejitter.c @@ -354,10 +353,6 @@ set(WEB_PLUGIN_FILES web/server/web_client.h web/server/web_server.c web/server/web_server.h - web/server/single/single-threaded.c - web/server/single/single-threaded.h - web/server/multi/multi-threaded.c - web/server/multi/multi-threaded.h web/server/static/static-threaded.c web/server/static/static-threaded.h web/server/web_client_cache.c @@ -411,6 +406,7 @@ set(API_PLUGIN_FILES web/api/formatters/charts2json.h web/api/formatters/rrdset2json.c web/api/formatters/rrdset2json.h + web/api/health/health_cmdapi.c ) set(STREAMING_PLUGIN_FILES @@ -479,7 +475,7 @@ add_definitions( -DLIBCONFIG_DIR="/usr/lib/netdata/conf.d" -DLOG_DIR="/var/log/netdata" -DPLUGINS_DIR="/usr/libexec/netdata" - -DWEB_DIR="/usr/share/netdata" + -DWEB_DIR="/usr/share/netdata/web" -DVARLIB_DIR="/var/lib/netdata" ) diff --git a/Makefile.am b/Makefile.am index 994b4260bd..a6998839ae 100644 --- a/Makefile.am +++ b/Makefile.am @@ -337,6 +337,8 @@ API_PLUGIN_FILES = \ web/api/formatters/charts2json.h \ web/api/formatters/rrdset2json.c \ web/api/formatters/rrdset2json.h \ + web/api/health/health_cmdapi.c \ + web/api/health/health_cmdapi.h \ web/api/web_api_v1.c \ web/api/web_api_v1.h \ $(NULL) @@ -374,10 +376,6 @@ WEB_PLUGIN_FILES = \ web/server/web_server.h \ web/server/web_client_cache.c \ web/server/web_client_cache.h \ - web/server/single/single-threaded.c \ - web/server/single/single-threaded.h \ - web/server/multi/multi-threaded.c \ - web/server/multi/multi-threaded.h \ web/server/static/static-threaded.c \ web/server/static/static-threaded.h \ $(NULL) diff --git a/build/subst.inc b/build/subst.inc index 508c0e142b..558d33adf9 100644 --- a/build/subst.inc +++ b/build/subst.inc @@ -5,6 +5,8 @@ -e 's#[@]configdir_POST@#$(configdir)#g' \ -e 's#[@]libconfigdir_POST@#$(libconfigdir)#g' \ -e 's#[@]cachedir_POST@#$(cachedir)#g' \ + -e 's#[@]registrydir_POST@#$(registrydir)#g' \ + -e 's#[@]varlibdir_POST@#$(varlibdir)#g' \ $< > $@.tmp; then \ mv "$@.tmp" "$@"; \ else \ diff --git a/configure.ac b/configure.ac index ff591f963c..86b9782ba9 100644 --- a/configure.ac +++ b/configure.ac @@ -609,10 +609,9 @@ AC_CONFIG_FILES([ web/api/queries/ses/Makefile web/api/queries/stddev/Makefile web/api/queries/sum/Makefile + web/api/health/Makefile web/gui/Makefile web/server/Makefile - web/server/single/Makefile - web/server/multi/Makefile web/server/static/Makefile ]) AC_OUTPUT diff --git a/daemon/main.c b/daemon/main.c index b2c4c80bf5..1ec5f6ed5a 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -67,8 +67,6 @@ struct netdata_static_thread static_threads[] = { // common plugins for all systems {"BACKENDS", NULL, NULL, 1, NULL, NULL, backends_main}, - {"WEB_SERVER[multi]", NULL, NULL, 1, NULL, NULL, socket_listen_main_multi_threaded}, - {"WEB_SERVER[single]", NULL, NULL, 0, NULL, NULL, socket_listen_main_single_threaded}, {"WEB_SERVER[static1]", NULL, NULL, 0, NULL, NULL, socket_listen_main_static_threaded}, {"STREAM", NULL, NULL, 0, NULL, NULL, rrdpush_sender_thread}, @@ -81,18 +79,10 @@ struct netdata_static_thread static_threads[] = { void web_server_threading_selection(void) { web_server_mode = web_server_mode_id(config_get(CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode))); - int multi_threaded = (web_server_mode == WEB_SERVER_MODE_MULTI_THREADED); - int single_threaded = (web_server_mode == WEB_SERVER_MODE_SINGLE_THREADED); int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED); int i; for (i = 0; static_threads[i].name; i++) { - if (static_threads[i].start_routine == socket_listen_main_multi_threaded) - static_threads[i].enabled = multi_threaded; - - if (static_threads[i].start_routine == socket_listen_main_single_threaded) - static_threads[i].enabled = single_threaded; - if (static_threads[i].start_routine == socket_listen_main_static_threaded) static_threads[i].enabled = static_threaded; } @@ -113,6 +103,8 @@ void web_server_config_options(void) { web_allow_registry_from = simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT); web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"), NULL, SIMPLE_PATTERN_EXACT); web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from", "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.* 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.* 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.* 172.31.*"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_mgmt_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow management from", "localhost"), NULL, SIMPLE_PATTERN_EXACT); + #ifdef NETDATA_WITH_ZLIB web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip); @@ -367,13 +359,6 @@ void log_init(void) { } static void backwards_compatible_config() { - // allow existing configurations to work with the current version of netdata - - if(config_exists(CONFIG_SECTION_GLOBAL, "multi threaded web server")) { - int mode = config_get_boolean(CONFIG_SECTION_GLOBAL, "multi threaded web server", 1); - web_server_mode = (mode)?WEB_SERVER_MODE_MULTI_THREADED:WEB_SERVER_MODE_SINGLE_THREADED; - } - // move [global] options to the [web] section config_move(CONFIG_SECTION_GLOBAL, "http port listen backlog", CONFIG_SECTION_WEB, "listen backlog"); @@ -876,7 +861,6 @@ int main(int argc, char **argv) { load_netdata_conf(NULL, 0); } - backwards_compatible_config(); get_netdata_configured_variables(); const char *section = argv[optind]; @@ -1056,7 +1040,6 @@ int main(int argc, char **argv) { rrd_init(netdata_configured_hostname); - // ------------------------------------------------------------------------ // enable log flood protection diff --git a/database/rrdcalc.h b/database/rrdcalc.h index 0c7cd0aa1e..4df4381ae2 100644 --- a/database/rrdcalc.h +++ b/database/rrdcalc.h @@ -25,6 +25,8 @@ #define RRDCALC_FLAG_WARN_ERROR 0x00000010 #define RRDCALC_FLAG_CRIT_ERROR 0x00000020 #define RRDCALC_FLAG_RUNNABLE 0x00000040 +#define RRDCALC_FLAG_DISABLED 0x00000080 +#define RRDCALC_FLAG_SILENCED 0x00000100 #define RRDCALC_FLAG_NO_CLEAR_NOTIFICATION 0x80000000 struct rrdcalc { diff --git a/database/rrdhost.c b/database/rrdhost.c index 43aa2daa29..7234db9a05 100644 --- a/database/rrdhost.c +++ b/database/rrdhost.c @@ -103,7 +103,6 @@ static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_ host->hash_machine_guid = simple_hash(host->machine_guid); } - // ---------------------------------------------------------------------------- // RRDHOST - add a host @@ -149,6 +148,7 @@ RRDHOST *rrdhost_create(const char *hostname, rrdhost_init_hostname(host, hostname); rrdhost_init_machine_guid(host, guid); + rrdhost_init_os(host, os); rrdhost_init_timezone(host, timezone); rrdhost_init_tags(host, tags); @@ -442,7 +442,7 @@ restart_after_removal: void rrd_init(char *hostname) { rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time); gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", gap_when_lost_iterations_above); - if(gap_when_lost_iterations_above < 1) + if (gap_when_lost_iterations_above < 1) gap_when_lost_iterations_above = 1; health_init(); @@ -471,6 +471,7 @@ void rrd_init(char *hostname) { , 1 ); rrd_unlock(); + web_client_api_v1_management_init(); } // ---------------------------------------------------------------------------- diff --git a/database/rrdsetvar.c b/database/rrdsetvar.c index 1bb883f0b4..9da4193049 100644 --- a/database/rrdsetvar.c +++ b/database/rrdsetvar.c @@ -150,12 +150,12 @@ RRDSETVAR *rrdsetvar_custom_chart_variable_create(RRDSET *st, const char *name) if(hash == rs->hash && strcmp(n, rs->variable) == 0) { rrdset_unlock(st); if(rs->options & RRDVAR_OPTION_CUSTOM_CHART_VAR) { - free(n); + freez(n); return rs; } else { error("RRDSETVAR: custom variable '%s' on chart '%s' of host '%s', conflicts with an internal chart variable", n, st->id, host->hostname); - free(n); + freez(n); return NULL; } } diff --git a/database/rrdvar.c b/database/rrdvar.c index 951a38caca..600bd34c4a 100644 --- a/database/rrdvar.c +++ b/database/rrdvar.c @@ -137,7 +137,7 @@ static RRDVAR *rrdvar_custom_variable_create(const char *scope, avl_tree_lock *t RRDVAR *rv = rrdvar_create_and_index(scope, tree_lock, name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_CUSTOM_HOST_VAR|RRDVAR_OPTION_ALLOCATED, v); if(unlikely(!rv)) { - free(v); + freez(v); debug(D_VARIABLES, "Requested variable '%s' already exists - possibly 2 plugins are updating it at the same time.", name); char *variable = strdupz(name); diff --git a/docs/generator/buildyaml.sh b/docs/generator/buildyaml.sh index 10811b17e4..3a96643ac8 100755 --- a/docs/generator/buildyaml.sh +++ b/docs/generator/buildyaml.sh @@ -94,8 +94,6 @@ markdown_extensions: - pymdownx.caret - pymdownx.critic - pymdownx.details - - pymdownx.emoji: - emoji_generator: !!python/name:pymdownx.emoji.to_svg - pymdownx.inlinehilite - pymdownx.magiclink - pymdownx.mark @@ -234,5 +232,5 @@ echo -ne "- Hacking netdata: navpart 2 makeself "" "" 4 navpart 2 libnetdata "" "libnetdata" 4 navpart 2 contrib -navpart 2 tests +navpart 2 tests "" "" 2 navpart 2 diagrams/data_structures diff --git a/docs/generator/requirements.txt b/docs/generator/requirements.txt index 5021831087..ac01be7aef 100644 --- a/docs/generator/requirements.txt +++ b/docs/generator/requirements.txt @@ -1,3 +1,2 @@ mkdocs>=1.0.1 mkdocs-material - diff --git a/health/README.md b/health/README.md index fae34fc71a..54f6a3e1f7 100644 --- a/health/README.md +++ b/health/README.md @@ -159,7 +159,7 @@ The simple pattern syntax and operation is explained in [simple patterns](../lib #### Alarm line `lookup` -This lines makes a database lookup to find a value. This result of this lookup is available as `$this`. +This line makes a database lookup to find a value. This result of this lookup is available as `$this`. The format is: @@ -465,7 +465,7 @@ Although the `alarm_variables` link shows you variables for a particular chart, - `$status`, which is resolved to the current status of the alarm (the current = the last status, i.e. before the current database lookup and the evaluation of the `calc` line). This values can be compared with `$REMOVED`, `$UNINITIALIZED`, `$UNDEFINED`, `$CLEAR`, - `$WARNING`, `$CRITICAL`. These values are incremental, ie. `$status > $CLEAL` works as + `$WARNING`, `$CRITICAL`. These values are incremental, ie. `$status > $CLEAR` works as expected. - `$now`, which is resolved to current unix timestamp. @@ -653,5 +653,11 @@ You can find the context of charts by looking up the chart in either You can find how netdata interpreted the expressions by examining the alarm at `http://your.netdata:19999/api/v1/alarms?all`. For each expression, netdata will return the expression as given in its config file, and the same expression with additional parentheses added to indicate the evaluation flow of the expression. +## Disabling health checks or silencing notifications at runtime + +The health checks can be controlled at runtime via the [health management api](../web/api/health/#health-management-api). [![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fhealth%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() + + + diff --git a/health/health.c b/health/health.c index 5acbfdd2e8..f92a1ba6b0 100644 --- a/health/health.c +++ b/health/health.c @@ -2,6 +2,12 @@ #include "health.h" +struct health_cmdapi_thread_status { + int status; + ; + struct rusage rusage; +}; + unsigned int default_health_enabled = 1; // ---------------------------------------------------------------------------- @@ -147,6 +153,12 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) { } } + // Check if alarm notifications are silenced + if (ae->flags & HEALTH_ENTRY_FLAG_SILENCED) { + info("Health not sending notification for alarm '%s.%s' status %s (command API has disabled notifications)", ae->chart, ae->name, rrdcalc_status2string(ae->new_status)); + goto done; + } + static char command_to_run[ALARM_EXEC_COMMAND_LENGTH + 1]; pid_t command_pid; @@ -381,6 +393,67 @@ static void health_main_cleanup(void *ptr) { static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } +SILENCE_TYPE check_silenced(RRDCALC *rc, char* host, SILENCERS *silencers) { + SILENCER *s; + debug(D_HEALTH, "Checking if alarm was silenced via the command API. Alarm info name:%s context:%s chart:%s host:%s family:%s", + rc->name, (rc->rrdset)?rc->rrdset->context:"", rc->chart, host, (rc->rrdset)?rc->rrdset->family:""); + + for (s = silencers->silencers; s!=NULL; s=s->next){ + if ( + (!s->alarms_pattern || (rc->name && s->alarms_pattern && simple_pattern_matches(s->alarms_pattern,rc->name))) && + (!s->contexts_pattern || (rc->rrdset && rc->rrdset->context && s->contexts_pattern && simple_pattern_matches(s->contexts_pattern,rc->rrdset->context))) && + (!s->hosts_pattern || (host && s->hosts_pattern && simple_pattern_matches(s->hosts_pattern,host))) && + (!s->charts_pattern || (rc->chart && s->charts_pattern && simple_pattern_matches(s->charts_pattern,rc->chart))) && + (!s->families_pattern || (rc->rrdset && rc->rrdset->family && s->families_pattern && simple_pattern_matches(s->families_pattern,rc->rrdset->family))) + ) { + debug(D_HEALTH, "Alarm matches command API silence entry %s:%s:%s:%s:%s", s->alarms,s->charts, s->contexts, s->hosts, s->families); + if (unlikely(silencers->stype == STYPE_NONE)) { + debug(D_HEALTH, "Alarm %s matched a silence entry, but no SILENCE or DISABLE command was issued via the command API. The match has no effect.", rc->name); + } else { + debug(D_HEALTH, "Alarm %s via the command API - name:%s context:%s chart:%s host:%s family:%s" + , (silencers->stype==STYPE_DISABLE_ALARMS)?"Disabled":"Silenced" + , rc->name + , (rc->rrdset)?rc->rrdset->context:"" + , rc->chart + , host + , (rc->rrdset)?rc->rrdset->family:"" + ); + } + return silencers->stype; + } + } + return STYPE_NONE; +} + +int update_disabled_silenced(RRDHOST *host, RRDCALC *rc) { + uint32_t rrdcalc_flags_old = rc->rrdcalc_flags; + // Clear the flags + rc->rrdcalc_flags &= ~(RRDCALC_FLAG_DISABLED | RRDCALC_FLAG_SILENCED); + if (unlikely(silencers->all_alarms)) { + if (silencers->stype == STYPE_DISABLE_ALARMS) rc->rrdcalc_flags |= RRDCALC_FLAG_DISABLED; + else if (silencers->stype == STYPE_SILENCE_NOTIFICATIONS) rc->rrdcalc_flags |= RRDCALC_FLAG_SILENCED; + } else { + SILENCE_TYPE st = check_silenced(rc, host->hostname, silencers); + if (st == STYPE_DISABLE_ALARMS) rc->rrdcalc_flags |= RRDCALC_FLAG_DISABLED; + else if (st == STYPE_SILENCE_NOTIFICATIONS) rc->rrdcalc_flags |= RRDCALC_FLAG_SILENCED; + } + + if (rrdcalc_flags_old != rc->rrdcalc_flags) { + info("Alarm silencing changed for host '%s' alarm '%s': Disabled %s->%s Silenced %s->%s", + host->hostname, + rc->name, + (rrdcalc_flags_old & RRDCALC_FLAG_DISABLED)?"true":"false", + (rc->rrdcalc_flags & RRDCALC_FLAG_DISABLED)?"true":"false", + (rrdcalc_flags_old & RRDCALC_FLAG_SILENCED)?"true":"false", + (rc->rrdcalc_flags & RRDCALC_FLAG_SILENCED)?"true":"false" + ); + } + if (rc->rrdcalc_flags & RRDCALC_FLAG_DISABLED) + return 1; + else + return 0; +} + void *health_main(void *ptr) { netdata_thread_cleanup_push(health_main_cleanup, ptr); @@ -391,371 +464,338 @@ void *health_main(void *ptr) { time_t hibernation_delay = config_get_number(CONFIG_SECTION_HEALTH, "postpone alarms during hibernation for seconds", 60); unsigned int loop = 0; - while(!netdata_exit) { - loop++; - debug(D_HEALTH, "Health monitoring iteration no %u started", loop); - - int runnable = 0, apply_hibernation_delay = 0; - time_t next_run = now + min_run_every; - RRDCALC *rc; - - if(unlikely(check_if_resumed_from_suspention())) { - apply_hibernation_delay = 1; - - info("Postponing alarm checks for %ld seconds, because it seems that the system was just resumed from suspension." - , hibernation_delay - ); - } - rrd_rdlock(); + silencers = mallocz(sizeof(SILENCERS)); + silencers->all_alarms=0; + silencers->stype=STYPE_NONE; + silencers->silencers=NULL; - RRDHOST *host; - rrdhost_foreach_read(host) { - if(unlikely(!host->health_enabled)) - continue; - - if(unlikely(apply_hibernation_delay)) { - - info("Postponing health checks for %ld seconds, on host '%s'." - , hibernation_delay - , host->hostname - ); - - host->health_delay_up_to = now + hibernation_delay; - } + while(!netdata_exit) { + loop++; + debug(D_HEALTH, "Health monitoring iteration no %u started", loop); - if(unlikely(host->health_delay_up_to)) { - if(unlikely(now < host->health_delay_up_to)) - continue; + int runnable = 0, apply_hibernation_delay = 0; + time_t next_run = now + min_run_every; + RRDCALC *rc; - info("Resuming health checks on host '%s'.", host->hostname); - host->health_delay_up_to = 0; - } + if (unlikely(che |