summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorthiagoftsm <thiagoftsm@gmail.com>2023-10-02 14:14:59 -0300
committerGitHub <noreply@github.com>2023-10-02 20:14:59 +0300
commita16bfec2e7afef4cecd2e24cf95af5f6e5e60244 (patch)
tree76d72f4b2818fbf616149f9531dac612f557b366
parent00f79017f9d2c45289eaa6939b418d5a9084e906 (diff)
Severity level for logs (#14727)
* log_level: Add new data type and variable used to control log level * log_level: Add helper and constants used to simplify user life when they are setting log level. I initially thought to create a log_select_log_level as void, but I thought the worse would be to have a variable defined inside a function, instead tto set outside like other calls inside log_init is doing. * log_level: Modify option name to match https://github.com/netdata/netdata/issues/14388#issuecomment-1412188747 * log_level: Modify function info_int to store data according user defintion for new variable * log_level: Modify function error_int to match new variable configuration. * documentation_adjust_code: After to clarify goals, I am removing level that is not necessary. * documentation_adjust_code: Pass serverity-level as argument for plugins. I had to modify the argument in this commit, because plugins like apps have specific algorithm to parse the arguments. * documentation_adjust_code: As informed in previous commit, to avoid modify all plugins, I am renaming configuration option. * documentation_adjust_code: Add new helper to modify log level from collectors and also modify the first collector (apps.plugin) * documentation_adjust_code: Modify cgroup-network to use severity-level * documentation_adjust_code: Add missing documentation for apps plugin * documentation_adjust_code: Modify cups.plugin to use severity-level * documentation_adjust_code: Modify ebpf.plugin to use severity-level * documentation_adjust_code: Modify freeimpi.plugin to use severity-level * documentation_adjust_code: Modify nfacct.plugin to use severity-level * documentation_adjust_code: Modify perf.plugin to use severity-level * documentation_adjust_code: Modify slab.plugin to use severity-level * documentation_adjust_code: Fix python.plugin * log_level: Revert condition added for info_int, because in any situation we will have INFO data * log_level: Rename function to match new circustances and add an argument instead to create another function that would have the same algorithm * log_level: The eBPF.plugin without this commit will not work properly with new variables, on the other hand the proper solution needs more than 14 lines of code. So I am pushing this commit to achieve the goal and simplify my reviewers. * log_level: Add additional information for our documentation * log_level: Bring initial changes for Plugin classes * log_level: Rename log_level to use_severity_level to simplify code read * log_level: Rename functions for a more meaningful name and also add a new function to pass the same value for all collectors * log_level: Modify all collectors to receive string as argument instead integer, this will simplify for developers that want to work with dictionary, and also we will have one unique common value for core/collectors. * log_level: Modify cgroup to inform cgroup_network what is the current severity_level * log_level: Modify class JobsConfigBuilder to use external severity level * log_level: Modify PythonDLogger class and add arguments for callers * log_level: Address flake8 * log_level: Modify daemon/config/README.md adding information about new log file and also new option * log_level: Accept suggestion given in https://github.com/netdata/netdata/pull/14727#pullrequestreview-1361221097 * log_level: Add informatin about tags with a link to configuration doc * ebpf_functions: Fix rebase. * rename_error_fcnt: Fix previous rebase and rebase again * log_level: Moddfy charts. * log_level: Modify tc-qos-helper to dispach message according security-level * log_level: Fix charts.d and typo * log_level: Remove unnecesary cast * log_level: Fix rebase * Fix rebase * log_level: Fix shellcheck * log_level: Remove severity level for external plugins * log_level: Remove severity level for external plugins (2) * log_level: Remove severity level for external plugins (cgrooup) * log_level: Restore cgroup line * log_level: Only set severity when developers do not need message * log_level: Add environment variable to communicate with external plugin and modify default level. * log_level: Modify functions to use severity level * Apply suggestions from code review fix sev level/global sev level comparison * rename var and add err short str * simplify logic in log level cmp * update docs * fix log_severity_level_to_severity_string * Update daemon/README.md --------- Co-authored-by: Ilya Mashchenko <ilya@netdata.cloud>
-rw-r--r--collectors/ebpf.plugin/ebpf.c3
-rwxr-xr-xcollectors/tc.plugin/tc-qos-helper.sh.in2
-rw-r--r--daemon/README.md2
-rw-r--r--daemon/config/README.md1
-rw-r--r--daemon/main.c4
-rw-r--r--libnetdata/log/README.md1
-rw-r--r--libnetdata/log/log.c38
-rw-r--r--libnetdata/log/log.h16
8 files changed, 63 insertions, 4 deletions
diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c
index 388aaf44b1..1ba5db61c1 100644
--- a/collectors/ebpf.plugin/ebpf.c
+++ b/collectors/ebpf.plugin/ebpf.c
@@ -3399,8 +3399,7 @@ unittest:
}
if (disable_cgroups) {
- if (disable_cgroups)
- ebpf_disable_cgroups();
+ ebpf_disable_cgroups();
}
if (select_threads) {
diff --git a/collectors/tc.plugin/tc-qos-helper.sh.in b/collectors/tc.plugin/tc-qos-helper.sh.in
index 97d4d016db..0fab69eeff 100755
--- a/collectors/tc.plugin/tc-qos-helper.sh.in
+++ b/collectors/tc.plugin/tc-qos-helper.sh.in
@@ -291,7 +291,7 @@ while true; do
echo "WORKTIME ${LOOPSLEEPMS_LASTWORK}" || exit
- loopsleepms ${update_every}
+ loopsleepms "${update_every}"
[ ${gc} -gt ${exit_after} ] && exit 0
done
diff --git a/daemon/README.md b/daemon/README.md
index e37c17cf65..0707a406c1 100644
--- a/daemon/README.md
+++ b/daemon/README.md
@@ -143,6 +143,8 @@ For most Netdata programs (including standard external plugins shipped by netdat
| `ERROR` | Something that might disable a part of netdata.<br/>The log line includes `errno` (if it is not zero). |
| `FATAL` | Something prevented a program from running.<br/>The log line includes `errno` (if it is not zero) and the program exited. |
+The `FATAL` and `ERROR` messages will always appear in the logs, and `INFO`can be filtered using [severity level](https://github.com/netdata/netdata/tree/master/daemon/config#logs-section-options) option.
+
So, when auto-detection of data collection fail, `ERROR` lines are logged and the relevant modules are disabled, but the
program continues to run.
diff --git a/daemon/config/README.md b/daemon/config/README.md
index 53ba42f5bd..11ba2a1bc7 100644
--- a/daemon/config/README.md
+++ b/daemon/config/README.md
@@ -139,6 +139,7 @@ Please note that your data history will be lost if you have modified `history` p
| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. |
| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. |
| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. |
+| severity level | `info` | Controls which log messages are logged, with error being the most important. Supported values: `info` and `error`. |
### [environment variables] section options
diff --git a/daemon/main.c b/daemon/main.c
index 9460223a4c..59c02bb498 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -874,6 +874,10 @@ static void log_init(void) {
setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1);
setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1);
+
+ char *selected_level = config_get(CONFIG_SECTION_LOGS, "severity level", NETDATA_LOG_LEVEL_INFO_STR);
+ global_log_severity_level = log_severity_string_to_severity_level(selected_level);
+ setenv("NETDATA_LOG_SEVERITY_LEVEL", selected_level , 1);
}
char *initialize_lock_directory_path(char *prefix)
diff --git a/libnetdata/log/README.md b/libnetdata/log/README.md
index 3684abd688..f811bb4b3a 100644
--- a/libnetdata/log/README.md
+++ b/libnetdata/log/README.md
@@ -12,4 +12,3 @@ learn_rel_path: "Developers/libnetdata"
The netdata log library supports debug, info, error and fatal error logging.
By default we have an access log, an error log and a collectors log.
-
diff --git a/libnetdata/log/log.c b/libnetdata/log/log.c
index 8289673118..e7fe81160a 100644
--- a/libnetdata/log/log.c
+++ b/libnetdata/log/log.c
@@ -34,6 +34,8 @@ const char *facility_log = NULL;
const char *stdhealth_filename = NULL;
const char *stdcollector_filename = NULL;
+netdata_log_level_t global_log_severity_level = NETDATA_LOG_LEVEL_INFO;
+
#ifdef ENABLE_ACLK
const char *aclklog_filename = NULL;
int aclklog_fd = -1;
@@ -780,6 +782,11 @@ void debug_int( const char *file, const char *function, const unsigned long line
void info_int( int is_collector, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... )
{
+#if !defined(NETDATA_INTERNAL_CHECKS) && !defined(NETDATA_DEV_MODE)
+ if (NETDATA_LOG_LEVEL_INFO > global_log_severity_level)
+ return;
+#endif
+
va_list args;
FILE *fp = (is_collector || !stderror) ? stderr : stderror;
@@ -908,6 +915,11 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma
}
void error_int(int is_collector, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) {
+#if !defined(NETDATA_INTERNAL_CHECKS) && !defined(NETDATA_DEV_MODE)
+ if (NETDATA_LOG_LEVEL_ERROR > global_log_severity_level)
+ return;
+#endif
+
// save a copy of errno - just in case this function generates a new error
int __errno = errno;
FILE *fp = (is_collector || !stderror) ? stderr : stderror;
@@ -1125,3 +1137,29 @@ void log_aclk_message_bin( const char *data, const size_t data_len, int tx, cons
}
}
#endif
+
+void log_set_global_severity_level(netdata_log_level_t value)
+{
+ global_log_severity_level = value;
+}
+
+netdata_log_level_t log_severity_string_to_severity_level(char *level)
+{
+ if (!strcmp(level, NETDATA_LOG_LEVEL_INFO_STR))
+ return NETDATA_LOG_LEVEL_INFO;
+ if (!strcmp(level, NETDATA_LOG_LEVEL_ERROR_STR) || !strcmp(level, NETDATA_LOG_LEVEL_ERROR_SHORT_STR))
+ return NETDATA_LOG_LEVEL_ERROR;
+
+ return NETDATA_LOG_LEVEL_INFO;
+}
+
+char *log_severity_level_to_severity_string(netdata_log_level_t level)
+{
+ switch (level) {
+ case NETDATA_LOG_LEVEL_ERROR:
+ return NETDATA_LOG_LEVEL_ERROR_STR;
+ case NETDATA_LOG_LEVEL_INFO:
+ default:
+ return NETDATA_LOG_LEVEL_INFO_STR;
+ }
+}
diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h
index 9ced07a9af..38aa2cbf54 100644
--- a/libnetdata/log/log.h
+++ b/libnetdata/log/log.h
@@ -105,6 +105,22 @@ typedef struct error_with_limit {
usec_t sleep_ut;
} ERROR_LIMIT;
+typedef enum netdata_log_level {
+ NETDATA_LOG_LEVEL_ERROR,
+ NETDATA_LOG_LEVEL_INFO,
+
+ NETDATA_LOG_LEVEL_END
+} netdata_log_level_t;
+
+#define NETDATA_LOG_LEVEL_INFO_STR "info"
+#define NETDATA_LOG_LEVEL_ERROR_STR "error"
+#define NETDATA_LOG_LEVEL_ERROR_SHORT_STR "err"
+
+extern netdata_log_level_t global_log_severity_level;
+netdata_log_level_t log_severity_string_to_severity_level(char *level);
+char *log_severity_level_to_severity_string(netdata_log_level_t level);
+void log_set_global_severity_level(netdata_log_level_t value);
+
#define error_limit_static_global_var(var, log_every_secs, sleep_usecs) static ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) }
#define error_limit_static_thread_var(var, log_every_secs, sleep_usecs) static __thread ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) }