diff options
author | thiagoftsm <thiagoftsm@gmail.com> | 2023-01-25 19:04:07 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-25 19:04:07 +0000 |
commit | 3e3ff4bee83363dca7cfb838baf1cf316960ed1b (patch) | |
tree | c62e0f721eb67856aeb7043ba4326af38159c850 /libnetdata | |
parent | bf38a22f323be8fa2b35bf782068d903064530ed (diff) |
Add Collector log (#14309)
Diffstat (limited to 'libnetdata')
-rw-r--r-- | libnetdata/log/log.c | 92 | ||||
-rw-r--r-- | libnetdata/log/log.h | 19 | ||||
-rw-r--r-- | libnetdata/procfile/procfile.c | 9 | ||||
-rw-r--r-- | libnetdata/procfile/procfile.h | 5 |
4 files changed, 80 insertions, 45 deletions
diff --git a/libnetdata/log/log.c b/libnetdata/log/log.c index bbf98d6693..06f2397582 100644 --- a/libnetdata/log/log.c +++ b/libnetdata/log/log.c @@ -14,6 +14,7 @@ uint64_t debug_flags = 0; int access_log_syslog = 1; int error_log_syslog = 1; +int collector_log_syslog = 1; int output_log_syslog = 1; // debug log int health_log_syslog = 1; @@ -23,11 +24,15 @@ FILE *stdaccess = NULL; int stdhealth_fd = -1; FILE *stdhealth = NULL; +int stdcollector_fd = -1; +FILE *stderror = NULL; + const char *stdaccess_filename = NULL; const char *stderr_filename = NULL; const char *stdout_filename = NULL; const char *facility_log = NULL; const char *stdhealth_filename = NULL; +const char *stdcollector_filename = NULL; #ifdef ENABLE_ACLK const char *aclklog_filename = NULL; @@ -573,8 +578,14 @@ void reopen_all_log_files() { if(stdout_filename) open_log_file(STDOUT_FILENO, stdout, stdout_filename, &output_log_syslog, 0, NULL); - if(stderr_filename) - open_log_file(STDERR_FILENO, stderr, stderr_filename, &error_log_syslog, 0, NULL); + if(stdcollector_filename) + open_log_file(STDERR_FILENO, stderr, stdcollector_filename, &collector_log_syslog, 0, NULL); + + if(stderr_filename) { + log_lock(); + stderror = open_log_file(stdcollector_fd, stderror, stderr_filename, &error_log_syslog, 1, &stdcollector_fd); + log_unlock(); + } #ifdef ENABLE_ACLK if (aclklog_enabled) @@ -593,7 +604,11 @@ void open_all_log_files() { open_log_file(STDIN_FILENO, stdin, "/dev/null", NULL, 0, NULL); open_log_file(STDOUT_FILENO, stdout, stdout_filename, &output_log_syslog, 0, NULL); - open_log_file(STDERR_FILENO, stderr, stderr_filename, &error_log_syslog, 0, NULL); + open_log_file(STDERR_FILENO, stderr, stdcollector_filename, &collector_log_syslog, 0, NULL); + + log_lock(); + stderror = open_log_file(stdcollector_fd, NULL, stderr_filename, &error_log_syslog, 1, &stdcollector_fd); + log_unlock(); #ifdef ENABLE_ACLK if(aclklog_enabled) @@ -616,7 +631,7 @@ int error_log_limit(int reset) { static time_t start = 0; static unsigned long counter = 0, prevented = 0; - // fprintf(stderr, "FLOOD: counter=%lu, allowed=%lu, backup=%lu, period=%llu\n", counter, error_log_errors_per_period, error_log_errors_per_period_backup, (unsigned long long)error_log_throttle_period); + // fprintf(stderror, "FLOOD: counter=%lu, allowed=%lu, backup=%lu, period=%llu\n", counter, error_log_errors_per_period, error_log_errors_per_period_backup, (unsigned long long)error_log_throttle_period); // do not throttle if the period is 0 if(error_log_throttle_period == 0) @@ -638,7 +653,7 @@ int error_log_limit(int reset) { char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( - stderr, + stderror, "%s: %s LOG FLOOD PROTECTION reset for process '%s' " "(prevented %lu logs in the last %"PRId64" seconds).\n", date, @@ -661,7 +676,7 @@ int error_log_limit(int reset) { char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( - stderr, + stderror, "%s: %s LOG FLOOD PROTECTION resuming logging from process '%s' " "(prevented %lu logs in the last %"PRId64" seconds).\n", date, @@ -685,7 +700,7 @@ int error_log_limit(int reset) { char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( - stderr, + stderror, "%s: %s LOG FLOOD PROTECTION too many logs (%lu logs in %"PRId64" seconds, threshold is set to %lu logs " "in %"PRId64" seconds). Preventing more logs from process '%s' for %"PRId64" seconds.\n", date, @@ -758,9 +773,10 @@ void debug_int( const char *file, const char *function, const unsigned long line // ---------------------------------------------------------------------------- // info log -void info_int( const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) +void info_int( int is_collector, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { va_list args; + FILE *fp = (is_collector) ? stderr : stderror; log_lock(); @@ -770,7 +786,7 @@ void info_int( const char *file __maybe_unused, const char *function __maybe_unu return; } - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_INFO, fmt, args ); va_end( args ); @@ -781,14 +797,15 @@ void info_int( const char *file __maybe_unused, const char *function __maybe_unu va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s INFO : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function); + fprintf(fp, "%s: %s INFO : %s : (%04lu@%-20.20s:%-15.15s): ", + date, program_name, netdata_thread_tag(), line, file, function); #else - fprintf(stderr, "%s: %s INFO : %s : ", date, program_name, netdata_thread_tag()); + fprintf(fp, "%s: %s INFO : %s : ", date, program_name, netdata_thread_tag()); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); - fputc('\n', stderr); + fputc('\n', fp); log_unlock(); } @@ -842,7 +859,7 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma return; } - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_ERR, fmt, args ); va_end( args ); @@ -853,26 +870,29 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); + fprintf(stderror, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", + date, program_name, prefix, netdata_thread_tag(), line, file, function); #else - fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); + fprintf(stderror, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); #endif - vfprintf( stderr, fmt, args ); + vfprintf(stderror, fmt, args ); va_end( args ); if(erl->count > 1) - fprintf(stderr, " (similar messages repeated %zu times in the last %llu secs)", erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0)); + fprintf(stderror, " (similar messages repeated %zu times in the last %llu secs)", + erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0)); if(erl->sleep_ut) - fprintf(stderr, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut); + fprintf(stderror, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut); if(__errno) { char buf[1024]; - fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + fprintf(stderror, + " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); errno = 0; } else - fputc('\n', stderr); + fputc('\n', stderror); erl->last_logged = now; erl->count = 0; @@ -880,9 +900,10 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma log_unlock(); } -void error_int(const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { +void error_int(int is_collector, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { // save a copy of errno - just in case this function generates a new error int __errno = errno; + FILE *fp = (is_collector) ? stderr : stderror; va_list args; @@ -894,7 +915,7 @@ void error_int(const char *prefix, const char *file __maybe_unused, const char * return; } - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_ERR, fmt, args ); va_end( args ); @@ -905,20 +926,22 @@ void error_int(const char *prefix, const char *file __maybe_unused, const char * va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); + fprintf(fp, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", + date, program_name, prefix, netdata_thread_tag(), line, file, function); #else - fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); + fprintf(fp, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); if(__errno) { char buf[1024]; - fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + fprintf(fp, + " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); errno = 0; } else - fputc('\n', stderr); + fputc('\n', fp); log_unlock(); } @@ -938,7 +961,7 @@ static void print_call_stack(void) { nptrs = backtrace(buffer, BT_BUF_SIZE); if(nptrs) - backtrace_symbols_fd(buffer, nptrs, fileno(stderr)); + backtrace_symbols_fd(buffer, nptrs, fileno(stderror)); } #endif @@ -949,7 +972,7 @@ void fatal_int( const char *file, const char *function, const unsigned long line const char *thread_tag; char os_threadname[NETDATA_THREAD_NAME_MAX + 1]; - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_CRIT, fmt, args ); va_end( args ); @@ -970,15 +993,16 @@ void fatal_int( const char *file, const char *function, const unsigned long line va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s FATAL : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, thread_tag, line, file, function); + fprintf(stderror, + "%s: %s FATAL : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, thread_tag, line, file, function); #else - fprintf(stderr, "%s: %s FATAL : %s : ", date, program_name, thread_tag); + fprintf(stderror, "%s: %s FATAL : %s : ", date, program_name, thread_tag); #endif - vfprintf( stderr, fmt, args ); + vfprintf(stderror, fmt, args ); va_end( args ); perror(" # "); - fputc('\n', stderr); + fputc('\n', stderror); log_unlock(); diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h index 11dab4c1df..3d9f0927db 100644 --- a/libnetdata/log/log.h +++ b/libnetdata/log/log.h @@ -61,10 +61,14 @@ extern FILE *stdaccess; extern int stdhealth_fd; extern FILE *stdhealth; +extern int stdcollector_fd; +extern FILE *stderror; + extern const char *stdaccess_filename; extern const char *stderr_filename; extern const char *stdout_filename; extern const char *stdhealth_filename; +extern const char *stdcollector_filename; extern const char *facility_log; #ifdef ENABLE_ACLK @@ -106,7 +110,7 @@ typedef struct error_with_limit { #ifdef NETDATA_INTERNAL_CHECKS #define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) -#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0) +#define internal_error(condition, args...) do { if(unlikely(condition)) error_int(0, "IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #define internal_fatal(condition, args...) do { if(unlikely(condition)) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #else #define debug(type, args...) debug_dummy() @@ -114,17 +118,20 @@ typedef struct error_with_limit { #define internal_fatal(args...) debug_dummy() #endif -#define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args) -#define infoerr(args...) error_int("INFO", __FILE__, __FUNCTION__, __LINE__, ##args) -#define error(args...) error_int("ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define info(args...) info_int(0, __FILE__, __FUNCTION__, __LINE__, ##args) +#define collector_info(args...) info_int(1, __FILE__, __FUNCTION__, __LINE__, ##args) +#define infoerr(args...) error_int(0, "INFO", __FILE__, __FUNCTION__, __LINE__, ##args) +#define error(args...) error_int(0, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define collector_infoerr(args...) error_int(1, "INFO", __FILE__, __FUNCTION__, __LINE__, ##args) +#define collector_error(args...) error_int(1, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) #define error_limit(erl, args...) error_limit_int(erl, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) #define fatal(args...) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args) #define fatal_assert(expr) ((expr) ? (void)(0) : fatal_int(__FILE__, __FUNCTION__, __LINE__, "Assertion `%s' failed", #expr)) void send_statistics(const char *action, const char *action_result, const char *action_data); void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); -void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); -void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); +void info_int( int is_collector, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); +void error_int( int is_collector, const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(6, 7); void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, unsigned long line __maybe_unused, const char *fmt, ... ) PRINTFLIKE(6, 7);; void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5); void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2); diff --git a/libnetdata/procfile/procfile.c b/libnetdata/procfile/procfile.c index eb04316c30..cdf0f97239 100644 --- a/libnetdata/procfile/procfile.c +++ b/libnetdata/procfile/procfile.c @@ -296,7 +296,8 @@ procfile *procfile_readall(procfile *ff) { debug(D_PROCFILE, "Reading file '%s', from position %zd with length %zd", procfile_filename(ff), s, (ssize_t)(ff->size - s)); r = read(ff->fd, &ff->data[s], ff->size - s); if(unlikely(r == -1)) { - if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot read from file '%s' on fd %d", procfile_filename(ff), ff->fd); + if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) collector_error(PF_PREFIX ": Cannot read from file '%s' on fd %d", procfile_filename(ff), ff->fd); + else if(unlikely(ff->flags & PROCFILE_FLAG_ERROR_ON_ERROR_LOG)) error(PF_PREFIX ": Cannot read from file '%s' on fd %d", procfile_filename(ff), ff->fd); procfile_close(ff); return NULL; } @@ -306,7 +307,8 @@ procfile *procfile_readall(procfile *ff) { // debug(D_PROCFILE, "Rewinding file '%s'", ff->filename); if(unlikely(lseek(ff->fd, 0, SEEK_SET) == -1)) { - if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot rewind on file '%s'.", procfile_filename(ff)); + if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) collector_error(PF_PREFIX ": Cannot rewind on file '%s'.", procfile_filename(ff)); + else if(unlikely(ff->flags & PROCFILE_FLAG_ERROR_ON_ERROR_LOG)) error(PF_PREFIX ": Cannot rewind on file '%s'.", procfile_filename(ff)); procfile_close(ff); return NULL; } @@ -403,7 +405,8 @@ procfile *procfile_open(const char *filename, const char *separators, uint32_t f int fd = open(filename, procfile_open_flags, 0666); if(unlikely(fd == -1)) { - if(unlikely(!(flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot open file '%s'", filename); + if(unlikely(!(flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) collector_error(PF_PREFIX ": Cannot open file '%s'", filename); + else if(unlikely(flags & PROCFILE_FLAG_ERROR_ON_ERROR_LOG)) error(PF_PREFIX ": Cannot open file '%s'", filename); return NULL; } diff --git a/libnetdata/procfile/procfile.h b/libnetdata/procfile/procfile.h index cae4ad4846..8db5b45f41 100644 --- a/libnetdata/procfile/procfile.h +++ b/libnetdata/procfile/procfile.h @@ -34,8 +34,9 @@ typedef struct { // ---------------------------------------------------------------------------- // The procfile -#define PROCFILE_FLAG_DEFAULT 0x00000000 -#define PROCFILE_FLAG_NO_ERROR_ON_FILE_IO 0x00000001 +#define PROCFILE_FLAG_DEFAULT 0x00000000 // To store inside `collector.log` +#define PROCFILE_FLAG_NO_ERROR_ON_FILE_IO 0x00000001 // Do not store nothing +#define PROCFILE_FLAG_ERROR_ON_ERROR_LOG 0x00000002 // Store inside `error.log` typedef enum __attribute__ ((__packed__)) procfile_separator { PF_CHAR_IS_SEPARATOR, |