diff options
author | Costa Tsaousis <costa@netdata.cloud> | 2022-11-01 20:31:57 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-01 20:31:57 +0200 |
commit | cef305c5e111cac7276a3c36e59dc84911e6d509 (patch) | |
tree | ec01e0eca3f8c3524fa51724b5184f72d62e6ec1 | |
parent | e3cbf35028932a2ca5ab3e806a21b6ee8dc14a8b (diff) |
error_limit() function to limit number of error lines per instance (#13924)
* error_limit() function to limit number of error lines per instance
* count should be more than 1
* protect settings ERROR_LIMIT members
-rw-r--r-- | libnetdata/log/log.c | 64 | ||||
-rw-r--r-- | libnetdata/log/log.h | 12 | ||||
-rw-r--r-- | libnetdata/socket/socket.c | 5 |
3 files changed, 78 insertions, 3 deletions
diff --git a/libnetdata/log/log.c b/libnetdata/log/log.c index b8c309989e..e01f929008 100644 --- a/libnetdata/log/log.c +++ b/libnetdata/log/log.c @@ -818,7 +818,69 @@ static const char *strerror_result_string(const char *a, const char *b) { (void) #error "cannot detect the format of function strerror_r()" #endif -void error_int( const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { +void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { + if(erl->sleep_ut) + sleep_usec(erl->sleep_ut); + + // save a copy of errno - just in case this function generates a new error + int __errno = errno; + + va_list args; + + log_lock(); + + erl->count++; + time_t now = now_boottime_sec(); + if(now - erl->last_logged < erl->log_every) { + log_unlock(); + return; + } + + // prevent logging too much + if (error_log_limit(0)) { + log_unlock(); + return; + } + + if(error_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_ERR, fmt, args ); + va_end( args ); + } + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + + va_start( args, fmt ); +#ifdef NETDATA_INTERNAL_CHECKS + fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); +#else + fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); +#endif + vfprintf( stderr, fmt, args ); + va_end( args ); + + if(erl->count > 1) + fprintf(stderr, " (repeated %zu times in the last %llu secs)", erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0)); + + if(erl->sleep_ut) + fprintf(stderr, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut); + + if(__errno) { + char buf[1024]; + fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + errno = 0; + } + else + fputc('\n', stderr); + + erl->last_logged = now; + erl->count = 0; + + log_unlock(); +} + +void error_int(const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { // save a copy of errno - just in case this function generates a new error int __errno = errno; diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h index ac8fa7a53d..11dab4c1df 100644 --- a/libnetdata/log/log.h +++ b/libnetdata/log/log.h @@ -94,6 +94,16 @@ static inline void debug_dummy(void) {} void error_log_limit_reset(void); void error_log_limit_unlimited(void); +typedef struct error_with_limit { + time_t log_every; + size_t count; + time_t last_logged; + usec_t sleep_ut; +} ERROR_LIMIT; + +#define error_limit_static_global_var(var, log_every_secs, sleep_usecs) static ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) } +#define error_limit_static_thread_var(var, log_every_secs, sleep_usecs) static __thread ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) } + #ifdef NETDATA_INTERNAL_CHECKS #define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0) @@ -107,6 +117,7 @@ void error_log_limit_unlimited(void); #define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args) #define infoerr(args...) error_int("INFO", __FILE__, __FUNCTION__, __LINE__, ##args) #define error(args...) error_int("ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define error_limit(erl, args...) error_limit_int(erl, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) #define fatal(args...) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args) #define fatal_assert(expr) ((expr) ? (void)(0) : fatal_int(__FILE__, __FUNCTION__, __LINE__, "Assertion `%s' failed", #expr)) @@ -114,6 +125,7 @@ void send_statistics(const char *action, const char *action_result, const char * void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); +void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, unsigned long line __maybe_unused, const char *fmt, ... ) PRINTFLIKE(6, 7);; void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5); void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2); void log_health( const char *fmt, ... ) PRINTFLIKE(1, 2); diff --git a/libnetdata/socket/socket.c b/libnetdata/socket/socket.c index 2f9d476662..4344d982a1 100644 --- a/libnetdata/socket/socket.c +++ b/libnetdata/socket/socket.c @@ -1547,8 +1547,9 @@ static int poll_process_new_tcp_connection(POLLJOB *p, POLLINFO *pi, struct poll debug(D_POLLFD, "POLLFD: LISTENER: accept4() slot %zu (fd %d) failed.", pi->slot, pf->fd); if(unlikely(errno == EMFILE)) { - error("POLLFD: LISTENER: too many open files - sleeping for 1ms - used by this thread %zu, max for this thread %zu", p->used, p->limit); - usleep(1000); // 1ms + error_limit_static_global_var(erl, 10, 1000); + error_limit(&erl, "POLLFD: LISTENER: too many open files - used by this thread %zu, max for this thread %zu", + p->used, p->limit); } else if(unlikely(errno != EWOULDBLOCK && errno != EAGAIN)) error("POLLFD: LISTENER: accept() failed."); |