summaryrefslogtreecommitdiffstats
path: root/collectors/apps.plugin
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-07-20 23:27:20 +0300
committerGitHub <noreply@github.com>2023-07-20 23:27:20 +0300
commitcc2de625739ef318361d90ed4ffa72a0a235405f (patch)
treedaf1d760d5b4518e2438f16a4eeb8fa2a3327245 /collectors/apps.plugin
parent5607d21c025bf79aa80fccb5ad07842a31066aef (diff)
apps.plugin fds limits improvements (#15467)
prevent wrong log about limits; log when the percentage of open fds is above limits
Diffstat (limited to 'collectors/apps.plugin')
-rw-r--r--collectors/apps.plugin/apps_plugin.c58
1 files changed, 51 insertions, 7 deletions
diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c
index 105c54262a..d24b275d4a 100644
--- a/collectors/apps.plugin/apps_plugin.c
+++ b/collectors/apps.plugin/apps_plugin.c
@@ -144,12 +144,13 @@ static const char *proc_states[] = {
// log each problem once per process
// log flood protection flags (log_thrown)
typedef enum __attribute__((packed)) {
- PID_LOG_IO = (1 << 0),
- PID_LOG_STATUS = (1 << 1),
- PID_LOG_CMDLINE = (1 << 2),
- PID_LOG_FDS = (1 << 3),
- PID_LOG_STAT = (1 << 4),
- PID_LOG_LIMITS = (1 << 5),
+ PID_LOG_IO = (1 << 0),
+ PID_LOG_STATUS = (1 << 1),
+ PID_LOG_CMDLINE = (1 << 2),
+ PID_LOG_FDS = (1 << 3),
+ PID_LOG_STAT = (1 << 4),
+ PID_LOG_LIMITS = (1 << 5),
+ PID_LOG_LIMITS_DETAIL = (1 << 6),
} PID_LOG;
static int
@@ -1373,11 +1374,16 @@ static inline int read_proc_pid_limits(struct pid_stat *p, void *ptr) {
#else
static char proc_pid_limits_buffer[MAX_PROC_PID_LIMITS + 1];
int ret = 0;
+ bool read_limits = false;
+
+ errno = 0;
kernel_uint_t all_fds = pid_openfds_sum(p);
- if(all_fds < p->limits.max_open_files / 2 && p->io_collected_usec > p->last_limits_collected_usec && p->io_collected_usec - p->last_limits_collected_usec <= 60 * USEC_PER_SEC)
+ if(all_fds < p->limits.max_open_files / 2 && p->io_collected_usec > p->last_limits_collected_usec && p->io_collected_usec - p->last_limits_collected_usec <= 60 * USEC_PER_SEC) {
// too frequent, we want to collect limits once per minute
+ ret = 1;
goto cleanup;
+ }
if(unlikely(!p->limits_filename)) {
char filename[FILENAME_MAX + 1];
@@ -1394,6 +1400,7 @@ static inline int read_proc_pid_limits(struct pid_stat *p, void *ptr) {
if(bytes <= 0)
goto cleanup;
+ read_limits = true;
p->limits.max_open_files = get_proc_pid_limits_limit(proc_pid_limits_buffer, PROC_PID_LIMITS_MAX_OPEN_FILES_KEY, sizeof(PROC_PID_LIMITS_MAX_OPEN_FILES_KEY) - 1, 0);
p->last_limits_collected_usec = p->io_collected_usec;
@@ -1405,6 +1412,43 @@ cleanup:
else
p->openfds_limits_percent = 0.0;
+ if(p->openfds_limits_percent > 100.0) {
+ if(!(p->log_thrown & PID_LOG_LIMITS_DETAIL)) {
+ netdata_log_info(
+ "FDS_LIMITS: PID %d (%s) is using "
+ "%0.2d %% of its fds limits, "
+ "open fds = %llu ("
+ "files = %llu, "
+ "pipes = %llu, "
+ "sockets = %llu, "
+ "inotifies = %llu, "
+ "eventfds = %llu, "
+ "timerfds = %llu, "
+ "signalfds = %llu, "
+ "eventpolls = %llu "
+ "other = %llu "
+ "), open fds limit = %llu, "
+ "%s",
+ p->pid, p->comm, p->openfds_limits_percent, all_fds,
+ p->openfds.files,
+ p->openfds.pipes,
+ p->openfds.sockets,
+ p->openfds.inotifies,
+ p->openfds.eventfds,
+ p->openfds.timerfds,
+ p->openfds.signalfds,
+ p->openfds.eventpolls,
+ p->openfds.other,
+ p->limits.max_open_files,
+ read_limits ? "and we have read the limits AFTER counting the fds"
+ : "but we have read the limits BEFORE counting the fds");
+
+ p->log_thrown |= PID_LOG_LIMITS_DETAIL;
+ }
+ }
+ else
+ p->log_thrown &= ~PID_LOG_LIMITS_DETAIL;
+
return ret;
#endif
}