summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2022-11-01 20:31:57 +0200
committerGitHub <noreply@github.com>2022-11-01 20:31:57 +0200
commitcef305c5e111cac7276a3c36e59dc84911e6d509 (patch)
treeec01e0eca3f8c3524fa51724b5184f72d62e6ec1
parente3cbf35028932a2ca5ab3e806a21b6ee8dc14a8b (diff)
error_limit() function to limit number of error lines per instance (#13924)
* error_limit() function to limit number of error lines per instance * count should be more than 1 * protect settings ERROR_LIMIT members
-rw-r--r--libnetdata/log/log.c64
-rw-r--r--libnetdata/log/log.h12
-rw-r--r--libnetdata/socket/socket.c5
3 files changed, 78 insertions, 3 deletions
diff --git a/libnetdata/log/log.c b/libnetdata/log/log.c
index b8c309989e..e01f929008 100644
--- a/libnetdata/log/log.c
+++ b/libnetdata/log/log.c
@@ -818,7 +818,69 @@ static const char *strerror_result_string(const char *a, const char *b) { (void)
#error "cannot detect the format of function strerror_r()"
#endif
-void error_int( const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) {
+void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) {
+ if(erl->sleep_ut)
+ sleep_usec(erl->sleep_ut);
+
+ // save a copy of errno - just in case this function generates a new error
+ int __errno = errno;
+
+ va_list args;
+
+ log_lock();
+
+ erl->count++;
+ time_t now = now_boottime_sec();
+ if(now - erl->last_logged < erl->log_every) {
+ log_unlock();
+ return;
+ }
+
+ // prevent logging too much
+ if (error_log_limit(0)) {
+ log_unlock();
+ return;
+ }
+
+ if(error_log_syslog) {
+ va_start( args, fmt );
+ vsyslog(LOG_ERR, fmt, args );
+ va_end( args );
+ }
+
+ char date[LOG_DATE_LENGTH];
+ log_date(date, LOG_DATE_LENGTH, now_realtime_sec());
+
+ va_start( args, fmt );
+#ifdef NETDATA_INTERNAL_CHECKS
+ fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function);
+#else
+ fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag());
+#endif
+ vfprintf( stderr, fmt, args );
+ va_end( args );
+
+ if(erl->count > 1)
+ fprintf(stderr, " (repeated %zu times in the last %llu secs)", erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0));
+
+ if(erl->sleep_ut)
+ fprintf(stderr, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut);
+
+ if(__errno) {
+ char buf[1024];
+ fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf));
+ errno = 0;
+ }
+ else
+ fputc('\n', stderr);
+
+ erl->last_logged = now;
+ erl->count = 0;
+
+ log_unlock();
+}
+
+void error_int(const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) {
// save a copy of errno - just in case this function generates a new error
int __errno = errno;
diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h
index ac8fa7a53d..11dab4c1df 100644
--- a/libnetdata/log/log.h
+++ b/libnetdata/log/log.h
@@ -94,6 +94,16 @@ static inline void debug_dummy(void) {}
void error_log_limit_reset(void);
void error_log_limit_unlimited(void);
+typedef struct error_with_limit {
+ time_t log_every;
+ size_t count;
+ time_t last_logged;
+ usec_t sleep_ut;
+} ERROR_LIMIT;
+
+#define error_limit_static_global_var(var, log_every_secs, sleep_usecs) static ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) }
+#define error_limit_static_thread_var(var, log_every_secs, sleep_usecs) static __thread ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) }
+
#ifdef NETDATA_INTERNAL_CHECKS
#define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0)
@@ -107,6 +117,7 @@ void error_log_limit_unlimited(void);
#define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args)
#define infoerr(args...) error_int("INFO", __FILE__, __FUNCTION__, __LINE__, ##args)
#define error(args...) error_int("ERROR", __FILE__, __FUNCTION__, __LINE__, ##args)
+#define error_limit(erl, args...) error_limit_int(erl, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args)
#define fatal(args...) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args)
#define fatal_assert(expr) ((expr) ? (void)(0) : fatal_int(__FILE__, __FUNCTION__, __LINE__, "Assertion `%s' failed", #expr))
@@ -114,6 +125,7 @@ void send_statistics(const char *action, const char *action_result, const char *
void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5);
void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5);
void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6);
+void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, unsigned long line __maybe_unused, const char *fmt, ... ) PRINTFLIKE(6, 7);;
void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5);
void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2);
void log_health( const char *fmt, ... ) PRINTFLIKE(1, 2);
diff --git a/libnetdata/socket/socket.c b/libnetdata/socket/socket.c
index 2f9d476662..4344d982a1 100644
--- a/libnetdata/socket/socket.c
+++ b/libnetdata/socket/socket.c
@@ -1547,8 +1547,9 @@ static int poll_process_new_tcp_connection(POLLJOB *p, POLLINFO *pi, struct poll
debug(D_POLLFD, "POLLFD: LISTENER: accept4() slot %zu (fd %d) failed.", pi->slot, pf->fd);
if(unlikely(errno == EMFILE)) {
- error("POLLFD: LISTENER: too many open files - sleeping for 1ms - used by this thread %zu, max for this thread %zu", p->used, p->limit);
- usleep(1000); // 1ms
+ error_limit_static_global_var(erl, 10, 1000);
+ error_limit(&erl, "POLLFD: LISTENER: too many open files - used by this thread %zu, max for this thread %zu",
+ p->used, p->limit);
}
else if(unlikely(errno != EWOULDBLOCK && errno != EAGAIN))
error("POLLFD: LISTENER: accept() failed.");