summaryrefslogtreecommitdiffstats
path: root/collectors
diff options
context:
space:
mode:
authorVladimir Kobal <vlad@prokk.net>2022-10-21 14:38:36 +0200
committerGitHub <noreply@github.com>2022-10-21 14:38:36 +0200
commitd02695ea6915d8d7a4ec7a5dde73f6dd18ae3bc1 (patch)
tree50a9f7e509050bae37a14b7f37417a07db9a3db5 /collectors
parent12671ae84859cb886e4875d15ecf4daa5d3bf1bc (diff)
Retry reading carrier, duplex, and speed files periodically (#13850)
* Retry reading carrier file periodically * Fix formatting type * Don't log error messages on every iteration for duplex and speed
Diffstat (limited to 'collectors')
-rw-r--r--collectors/proc.plugin/proc_net_dev.c74
1 files changed, 56 insertions, 18 deletions
diff --git a/collectors/proc.plugin/proc_net_dev.c b/collectors/proc.plugin/proc_net_dev.c
index 701cc00ea7..dc6c71e97f 100644
--- a/collectors/proc.plugin/proc_net_dev.c
+++ b/collectors/proc.plugin/proc_net_dev.c
@@ -7,6 +7,8 @@
#define STATE_LENGTH_MAX 32
+#define READ_RETRY_PERIOD 60 // seconds
+
enum {
NETDEV_DUPLEX_UNKNOWN,
NETDEV_DUPLEX_HALF,
@@ -55,6 +57,15 @@ static struct netdev {
int configured;
int enabled;
int updated;
+
+ int carrier_file_exists;
+ time_t carrier_file_lost_time;
+
+ int duplex_file_exists;
+ time_t duplex_file_lost_time;
+
+ int speed_file_exists;
+ time_t speed_file_lost_time;
int do_bandwidth;
int do_packets;
@@ -859,21 +870,37 @@ int do_proc_net_dev(int update_every, usec_t dt) {
if ((d->do_carrier != CONFIG_BOOLEAN_NO ||
d->do_duplex != CONFIG_BOOLEAN_NO ||
d->do_speed != CONFIG_BOOLEAN_NO) &&
- d->filename_carrier) {
+ d->filename_carrier &&
+ (d->carrier_file_exists ||
+ now_monotonic_sec() - d->carrier_file_lost_time > READ_RETRY_PERIOD)) {
if (read_single_number_file(d->filename_carrier, &d->carrier)) {
- error("Cannot refresh interface %s carrier state by reading '%s'. Stop updating it.", d->name, d->filename_carrier);
- freez(d->filename_carrier);
- d->filename_carrier = NULL;
+ if (d->carrier_file_exists)
+ error(
+ "Cannot refresh interface %s carrier state by reading '%s'. Next update is in %d seconds.",
+ d->name,
+ d->filename_carrier,
+ READ_RETRY_PERIOD);
+ d->carrier_file_exists = 0;
+ d->carrier_file_lost_time = now_monotonic_sec();
+ } else {
+ d->carrier_file_exists = 1;
+ d->carrier_file_lost_time = 0;
}
}
- if (d->do_duplex != CONFIG_BOOLEAN_NO && d->filename_duplex && (d->carrier || !d->filename_carrier)) {
+ if (d->do_duplex != CONFIG_BOOLEAN_NO &&
+ d->filename_duplex &&
+ (d->carrier || d->carrier_file_exists) &&
+ (d->duplex_file_exists ||
+ now_monotonic_sec() - d->duplex_file_lost_time > READ_RETRY_PERIOD)) {
char buffer[STATE_LENGTH_MAX + 1];
if (read_file(d->filename_duplex, buffer, STATE_LENGTH_MAX)) {
- error("Cannot refresh interface %s duplex state by reading '%s'. I will stop updating it.", d->name, d->filename_duplex);
- freez(d->filename_duplex);
- d->filename_duplex = NULL;
+ if (d->duplex_file_exists)
+ error("Cannot refresh interface %s duplex state by reading '%s'.", d->name, d->filename_duplex);
+ d->duplex_file_exists = 0;
+ d->duplex_file_lost_time = now_monotonic_sec();
+ d->duplex = NETDEV_DUPLEX_UNKNOWN;
} else {
// values can be unknown, half or full -- just check the first letter for speed
if (buffer[0] == 'f')
@@ -882,9 +909,11 @@ int do_proc_net_dev(int update_every, usec_t dt) {
d->duplex = NETDEV_DUPLEX_HALF;
else
d->duplex = NETDEV_DUPLEX_UNKNOWN;
+ d->duplex_file_exists = 1;
+ d->duplex_file_lost_time = 0;
}
} else {
- d->duplex = 0;
+ d->duplex = NETDEV_DUPLEX_UNKNOWN;
}
if(d->do_operstate != CONFIG_BOOLEAN_NO && d->filename_operstate) {
@@ -904,7 +933,8 @@ int do_proc_net_dev(int update_every, usec_t dt) {
if (d->do_mtu != CONFIG_BOOLEAN_NO && d->filename_mtu) {
if (read_single_number_file(d->filename_mtu, &d->mtu)) {
- error("Cannot refresh mtu for interface %s by reading '%s'. Stop updating it.", d->name, d->filename_mtu);
+ error(
+ "Cannot refresh mtu for interface %s by reading '%s'. Stop updating it.", d->name, d->filename_mtu);
freez(d->filename_mtu);
d->filename_mtu = NULL;
}
@@ -970,25 +1000,29 @@ int do_proc_net_dev(int update_every, usec_t dt) {
d->chart_var_speed =
rrdsetvar_custom_chart_variable_add_and_acquire(d->st_bandwidth, "nic_speed_max");
if(!d->chart_var_speed) {
- error("Cannot create interface %s chart variable 'nic_speed_max'. Will not update its speed anymore.", d->name);
+ error(
+ "Cannot create interface %s chart variable 'nic_speed_max'. Will not update its speed anymore.",
+ d->name);
freez(d->filename_speed);
d->filename_speed = NULL;
}
}
- if(d->filename_speed && d->chart_var_speed) {
+ if (d->filename_speed && d->chart_var_speed) {
int ret = 0;
- if (d->carrier || !d->filename_carrier) {
+ if ((d->carrier || d->carrier_file_exists) &&
+ (d->speed_file_exists || now_monotonic_sec() - d->speed_file_lost_time > READ_RETRY_PERIOD)) {
ret = read_single_number_file(d->filename_speed, (unsigned long long *) &d->speed);
} else {
d->speed = 0;
}
if(ret) {
- error("Cannot refresh interface %s speed by reading '%s'. Will not update its speed anymore.", d->name, d->filename_speed);
- freez(d->filename_speed);
- d->filename_speed = NULL;
+ if (d->speed_file_exists)
+ error("Cannot refresh interface %s speed by reading '%s'.", d->name, d->filename_speed);
+ d->speed_file_exists = 0;
+ d->speed_file_lost_time = now_monotonic_sec();
}
else {
if(d->do_speed != CONFIG_BOOLEAN_NO) {
@@ -1020,7 +1054,11 @@ int do_proc_net_dev(int update_every, usec_t dt) {
rrdset_done(d->st_speed);
}
- rrdsetvar_custom_chart_variable_set(d->st_bandwidth, d->chart_var_speed, (NETDATA_DOUBLE) d->speed * KILOBITS_IN_A_MEGABIT);
+ rrdsetvar_custom_chart_variable_set(
+ d->st_bandwidth, d->chart_var_speed, (NETDATA_DOUBLE)d->speed * KILOBITS_IN_A_MEGABIT);
+
+ d->speed_file_exists = 1;
+ d->speed_file_lost_time = 0;
}
}
}
@@ -1106,7 +1144,7 @@ int do_proc_net_dev(int update_every, usec_t dt) {
// --------------------------------------------------------------------
- if(d->do_carrier != CONFIG_BOOLEAN_NO && d->filename_carrier) {
+ if(d->do_carrier != CONFIG_BOOLEAN_NO && d->carrier_file_exists) {
if(unlikely(!d->st_carrier)) {
d->st_carrier = rrdset_create_localhost(
d->chart_type_net_carrier