summaryrefslogtreecommitdiffstats
path: root/collectors
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-07-26 01:06:57 +0300
committerGitHub <noreply@github.com>2023-07-26 01:06:57 +0300
commit065091c3f5d908c5e06ed5ac817999f7dc965b90 (patch)
tree70e29c0d74eaf33f69e9e393732d3d051fd3da5d /collectors
parentaccc426c8ab08a75c9e6470a4a89482fb4c4eb17 (diff)
proc integrations (#15494)
Co-authored-by: ilyam8 <ilya@netdata.cloud>
Diffstat (limited to 'collectors')
-rw-r--r--collectors/all.h46
-rw-r--r--collectors/cgroups.plugin/sys_fs_cgroup.c164
-rw-r--r--collectors/debugfs.plugin/debugfs_zswap.c28
-rw-r--r--collectors/ebpf.plugin/ebpf_swap.c8
-rw-r--r--collectors/freebsd.plugin/freebsd_sysctl.c4
-rw-r--r--collectors/proc.plugin/multi_metadata.yaml412
-rw-r--r--collectors/proc.plugin/proc_meminfo.c520
-rw-r--r--collectors/proc.plugin/proc_pressure.c137
-rw-r--r--collectors/proc.plugin/proc_pressure.h3
-rw-r--r--collectors/proc.plugin/proc_stat.c2
-rw-r--r--collectors/proc.plugin/proc_vmstat.c12
11 files changed, 1086 insertions, 250 deletions
diff --git a/collectors/all.h b/collectors/all.h
index f28865c019..9bfae914ea 100644
--- a/collectors/all.h
+++ b/collectors/all.h
@@ -23,18 +23,6 @@
#define NETDATA_CHART_PRIO_SYSTEM_IO 150
#define NETDATA_CHART_PRIO_SYSTEM_PGPGIO 151
#define NETDATA_CHART_PRIO_SYSTEM_RAM 200
-#define NETDATA_CHART_PRIO_SYSTEM_SWAP 201
-#define NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS 202
-#define NETDATA_CHART_PRIO_SYSTEM_SWAPIO 250
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO 300
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO 301
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE 302
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE 303
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS 304
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT 305
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES 306
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE 307
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY 308
#define NETDATA_CHART_PRIO_SYSTEM_NET 500
#define NETDATA_CHART_PRIO_SYSTEM_IPV4 500 // freebsd only
#define NETDATA_CHART_PRIO_SYSTEM_IP 501
@@ -87,17 +75,35 @@
#define NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE 1010
#define NETDATA_CHART_PRIO_MEM_SYSTEM_OOM_KILL 1020
#define NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED 1030
-#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1040
+#define NETDATA_CHART_PRIO_MEM_SWAP 1035
+#define NETDATA_CHART_PRIO_MEM_SWAP_CALLS 1037
+#define NETDATA_CHART_PRIO_MEM_SWAPIO 1038
+#define NETDATA_CHART_PRIO_MEM_ZSWAP 1036
+#define NETDATA_CHART_PRIO_MEM_ZSWAPIO 1037
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO 1038
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE 1039
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE 1040
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS 1041
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT 1042
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES 1043
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE 1044
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY 1045
+#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS 1050
#define NETDATA_CHART_PRIO_MEM_KERNEL 1100
#define NETDATA_CHART_PRIO_MEM_SLAB 1200
+#define NETDATA_CHART_PRIO_MEM_RECLAIMING 1210
+#define NETDATA_CHART_PRIO_MEM_HIGH_LOW 1211
+#define NETDATA_CHART_PRIO_MEM_CMA 1212
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES 1250
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS 1251
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE 1252
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO 1253
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED 1254
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS 1255
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT 1256
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT 1257
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_DETAILS 1251
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS 1252
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE 1253
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO 1254
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED 1255
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS 1256
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT 1257
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT 1258
+#define NETDATA_CHART_PRIO_MEM_DIRECTMAP 1260
#define NETDATA_CHART_PRIO_MEM_KSM 1300
#define NETDATA_CHART_PRIO_MEM_KSM_SAVINGS 1301
#define NETDATA_CHART_PRIO_MEM_KSM_RATIOS 1302
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c
index fb805e63ce..b7a6a46485 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.c
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.c
@@ -62,6 +62,8 @@ static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
+static int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO;
+static int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO;
static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
@@ -828,6 +830,7 @@ struct cgroup {
struct pressure cpu_pressure;
struct pressure io_pressure;
struct pressure memory_pressure;
+ struct pressure irq_pressure;
// per cgroup charts
RRDSET *st_cpu;
@@ -1451,28 +1454,33 @@ static inline void cgroup2_read_pressure(struct pressure *res) {
return;
}
- res->some.share_time.value10 = strtod(procfile_lineword(ff, 0, 2), NULL);
- res->some.share_time.value60 = strtod(procfile_lineword(ff, 0, 4), NULL);
- res->some.share_time.value300 = strtod(procfile_lineword(ff, 0, 6), NULL);
- res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8), NULL) / 1000; // us->ms
+ bool did_some = false, did_full = false;
- if (lines > 2) {
- res->full.share_time.value10 = strtod(procfile_lineword(ff, 1, 2), NULL);
- res->full.share_time.value60 = strtod(procfile_lineword(ff, 1, 4), NULL);
- res->full.share_time.value300 = strtod(procfile_lineword(ff, 1, 6), NULL);
- res->full.total_time.value_total = str2ull(procfile_lineword(ff, 1, 8), NULL) / 1000; // us->ms
+ for(size_t l = 0; l < lines ;l++) {
+ const char *key = procfile_lineword(ff, l, 0);
+ if(strcmp(key, "some") == 0) {
+ res->some.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL);
+ res->some.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL);
+ res->some.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL);
+ res->some.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms
+ did_some = true;
+ }
+ else if(strcmp(key, "full") == 0) {
+ res->full.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL);
+ res->full.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL);
+ res->full.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL);
+ res->full.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms
+ did_full = true;
+ }
}
- res->updated = 1;
+ res->updated = (did_full || did_some) ? 1 : 0;
- if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) {
- res->some.enabled = CONFIG_BOOLEAN_YES;
- if (lines > 2) {
- res->full.enabled = CONFIG_BOOLEAN_YES;
- } else {
- res->full.enabled = CONFIG_BOOLEAN_NO;
- }
- }
+ if(unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO))
+ res->some.enabled = (did_some) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
+
+ if(unlikely(res->full.enabled == CONFIG_BOOLEAN_AUTO))
+ res->full.enabled = (did_full) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
}
}
@@ -1637,6 +1645,7 @@ static inline void read_cgroup(struct cgroup *cg) {
cgroup2_read_pressure(&cg->cpu_pressure);
cgroup2_read_pressure(&cg->io_pressure);
cgroup2_read_pressure(&cg->memory_pressure);
+ cgroup2_read_pressure(&cg->irq_pressure);
cgroup_read_memory(&cg->memory, 1);
}
}
@@ -1851,6 +1860,7 @@ static inline void cgroup_free(struct cgroup *cg) {
free_pressure(&cg->cpu_pressure);
free_pressure(&cg->io_pressure);
free_pressure(&cg->memory_pressure);
+ free_pressure(&cg->irq_pressure);
freez(cg->id);
freez(cg->intermediate_id);
@@ -2465,6 +2475,18 @@ static inline void discovery_update_filenames() {
netdata_log_debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
}
}
+
+ if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->irq_pressure.filename = strdupz(filename);
+ cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some;
+ cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full;
+ netdata_log_debug(D_CGROUP, "irq.pressure filename for cgroup '%s': '%s'", cg->id, cg->irq_pressure.filename);
+ } else {
+ netdata_log_debug(D_CGROUP, "irq.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
+ }
+ }
}
}
}
@@ -4643,6 +4665,112 @@ void update_cgroup_charts(int update_every) {
update_pressure_charts(pcs);
}
+ res = &cg->irq_pressure;
+
+ if (likely(res->updated && res->some.enabled)) {
+ struct pressure_charts *pcs;
+ pcs = &res->some;
+
+ if (unlikely(!pcs->share_time.st)) {
+ RRDSET *chart;
+ snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure");
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "irq_some_pressure"
+ , NULL
+ , "interrupts"
+ , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure"
+ , title
+ , "percentage"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , cgroup_containers_chart_priority + 2310
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+ rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ if (unlikely(!pcs->total_time.st)) {
+ RRDSET *chart;
+ snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure stall time");
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "irq_some_pressure_stall_time"
+ , NULL
+ , "interrupts"
+ , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time"
+ , title
+ , "ms"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , cgroup_containers_chart_priority + 2330
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+ rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ update_pressure_charts(pcs);
+ }
+
+ if (likely(res->updated && res->full.enabled)) {
+ struct pressure_charts *pcs;
+ pcs = &res->full;
+
+ if (unlikely(!pcs->share_time.st)) {
+ RRDSET *chart;
+ snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure");
+
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "irq_full_pressure"
+ , NULL
+ , "interrupts"
+ , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure"
+ , title
+ , "percentage"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , cgroup_containers_chart_priority + 2350
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ if (unlikely(!pcs->total_time.st)) {
+ RRDSET *chart;
+ snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure stall time");
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+ , "irq_full_pressure_stall_time"
+ , NULL
+ , "interrupts"
+ , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time"
+ , title
+ , "ms"
+ , PLUGIN_CGROUPS_NAME
+ , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+ , cgroup_containers_chart_priority + 2370
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+ rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ update_pressure_charts(pcs);
+ }
+
res = &cg->io_pressure;
if (likely(res->updated && res->some.enabled)) {
diff --git a/collectors/debugfs.plugin/debugfs_zswap.c b/collectors/debugfs.plugin/debugfs_zswap.c
index c8fc0f030a..502a04f1f8 100644
--- a/collectors/debugfs.plugin/debugfs_zswap.c
+++ b/collectors/debugfs.plugin/debugfs_zswap.c
@@ -38,7 +38,7 @@ static struct netdata_zswap_metric zswap_calculated_metrics[] = {
.charttype = RRDSET_TYPE_LINE,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO,
.divisor = 100,
.convertv = NULL,
.value = -1},
@@ -71,7 +71,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_AREA,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE,
.divisor = 1,
.convertv = NULL,
.value = -1},
@@ -84,7 +84,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_AREA,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE,
.divisor = 1,
.convertv = pages_to_bytes,
.value = -1},
@@ -97,7 +97,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_LINE,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT,
.divisor = 1,
.convertv = NULL,
.value = -1},
@@ -110,7 +110,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_AREA,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES,
.divisor = 1,
.convertv = pages_to_bytes,
.value = -1},
@@ -123,7 +123,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_AREA,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE,
.divisor = 1,
.convertv = pages_to_bytes,
.value = -1},
@@ -136,7 +136,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
.charttype = RRDSET_TYPE_LINE,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY,
.divisor = 1,
.convertv = NULL,
.value = -1},
@@ -175,7 +175,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@@ -188,7 +188,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@@ -201,7 +201,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@@ -214,7 +214,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@@ -227,7 +227,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
.charttype = RRDSET_TYPE_STACKED,
.enabled = CONFIG_BOOLEAN_YES,
.chart_created = CONFIG_BOOLEAN_NO,
- .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+ .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
.divisor = 1,
.convertv = NULL,
.value = -1},
@@ -266,7 +266,7 @@ zswap_send_chart(struct netdata_zswap_metric *metric, int update_every, const ch
{
fprintf(
stdout,
- "CHART system.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n",
+ "CHART mem.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n",
metric->chart_id,
metric->title,
metric->units,
@@ -291,7 +291,7 @@ static void zswap_send_dimension(struct netdata_zswap_metric *metric)
static void zswap_send_begin(struct netdata_zswap_metric *metric)
{
- fprintf(stdout, "BEGIN system.zswap_%s\n", metric->chart_id);
+ fprintf(stdout, "BEGIN mem.zswap_%s\n", metric->chart_id);
}
static void zswap_send_set(struct netdata_zswap_metric *metric)
diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c
index ff74ee842e..7b1dad5ecb 100644
--- a/collectors/ebpf.plugin/ebpf_swap.c
+++ b/collectors/ebpf.plugin/ebpf_swap.c
@@ -322,13 +322,13 @@ void ebpf_obsolete_swap_apps_charts(struct ebpf_module *em)
*/
static void ebpf_obsolete_swap_global(ebpf_module_t *em)
{
- ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP,
+ ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_MEM_SWAP_CHART,
"Calls to access swap memory",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE,
NULL,
- NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS,
+ NETDATA_CHART_PRIO_MEM_SWAP_CALLS,
em->update_every);
}
@@ -914,12 +914,12 @@ static void ebpf_swap_allocate_global_vectors(int apps)
*/
static void ebpf_create_swap_charts(int update_every)
{
- ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_MEM_SWAP_CHART,
+ ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_MEM_SWAP_CHART,
"Calls to access swap memory",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU,
NULL,
NETDATA_EBPF_CHART_TYPE_LINE,
- NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS,
+ NETDATA_CHART_PRIO_MEM_SWAP_CALLS,
ebpf_create_global_dimension,
swap_publish_aggregated, NETDATA_SWAP_END,
update_every, NETDATA_EBPF_MODULE_NAME_SWAP);
diff --git a/collectors/freebsd.plugin/freebsd_sysctl.c b/collectors/freebsd.plugin/freebsd_sysctl.c
index c8aa5dad54..84289aafa8 100644
--- a/collectors/freebsd.plugin/freebsd_sysctl.c
+++ b/collectors/freebsd.plugin/freebsd_sysctl.c
@@ -1035,7 +1035,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) {
if (unlikely(!st)) {
st = rrdset_create_localhost(
- "system",
+ "mem",
"swapio",
NULL,
"swap",
@@ -1044,7 +1044,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) {
"KiB/s",
"freebsd.plugin",
"vm.stats.vm.v_swappgs",
- NETDATA_CHART_PRIO_SYSTEM_SWAPIO,
+ NETDATA_CHART_PRIO_MEM_SWAPIO,
update_every,
RRDSET_TYPE_AREA
);
diff --git a/collectors/proc.plugin/multi_metadata.yaml b/collectors/proc.plugin/multi_metadata.yaml
index 04e66ddfe6..bdbce89f98 100644
--- a/collectors/proc.plugin/multi_metadata.yaml
+++ b/collectors/proc.plugin/multi_metadata.yaml
@@ -20,27 +20,50 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration provides a collection of statistics about the system such as CPU utilization, process counts and more."
+ metrics_description: |
+ CPU utilization, states and frequencies and key Linux system performance metrics.
+
+ The `/proc/stat` file provides various types of system statistics:
+
+ - The overall system CPU usage statistics
+ - Per CPU core statistics
+ - The total context switching of the system
+ - The total number of processes running
+ - The total CPU interrupts
+ - The total CPU softirqs
+
+ The collector also reads:
+
+ - `/proc/schedstat` for statistics about the process scheduler in the Linux kernel.
+ - `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems.
+ - `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system.
+ - `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core.
+ - `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states.
+ - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system.
+ - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started.
method_description: ""
supported_platforms:
- include: []
+ include: [ "linux" ]
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
auto_detection:
- description: ""
+ description: |
+ The collector auto-detects all metrics. No configuration is needed.
limits:
description: ""
performance_impact:
- description: ""
+ description: |
+ The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available.
setup:
prerequisites:
list: []
configuration:
file:
- name: ""
+ section_name: "plugin:proc:/proc/stat"
+ name: "netdata.conf"
description: ""
options:
description: ""
@@ -187,12 +210,43 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: ""
+ metrics_description: |
+ Entropy, a measure of the randomness or unpredictability of data.
+
+ In the context of cryptography, entropy is used to generate random numbers or keys that are essential for
+ secure communication and encryption. Without a good source of entropy, cryptographic protocols can become
+ vulnerable to attacks that exploit the predictability of the generated keys.
+
+ In most operating systems, entropy is generated by collecting random events from various sources, such as
+ hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool
+ of entropy, which is then used to generate random numbers when needed.
+
+ The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs
+ to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device,
+ which blocks until enough entropy is available to generate the requested numbers. This ensures that the
+ generated numbers are truly random and not predictable.
+
+ However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing
+ programs that rely on random numbers to slow down or even freeze. This is especially problematic for
+ cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH.
+
+ To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality
+ entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or
+ radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than
+ software-based sources.
+
+ One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used
+ for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates
+ high-quality entropy, which can be used to seed the pool of entropy in the operating system.
+
+ Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by
+ exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions
+ can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions.
method_description: ""
supported_platforms:
- include: []
+ include: [ "linux" ]
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -264,12 +318,23 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration provides the amount of time the system has been up (running)."
+ metrics_description: |
+ The amount of time the system has been up (running).
+
+ Uptime is a critical aspect of overall system performance:
+
+ - **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes.
+ - **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends.
+ - **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems.
+ - **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause.
+ - **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others.
+ - **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention.
+ - **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability.
method_description: ""
supported_platforms:
- include: []
+ include: [ "linux" ]
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -340,12 +405,33 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration provides information about process, memory, swap space utilization and many more."
+ metrics_description: |
+ Linux Virtual memory subsystem.
+
+ Information about memory management, indicating how effectively the kernel allocates and frees
+ memory resources in response to system demands.
+
+ Monitors page faults, which occur when a process requests a portion of its memory that isn't
+ immediately available. Monitoring these events can help diagnose inefficiencies in memory management and
+ provide insights into application behavior.
+
+ Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to
+ swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap,
+ a compressed cache for swap pages, and provides insights into its usage and performance implications.
+
+ In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance
+ memory resources between host and guest systems.
+
+ For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which
+ can impact the performance based on the memory access times.
+
+ The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out
+ of memory resources.
method_description: ""
supported_platforms:
- include: []
+ include: [ "linux" ]
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -535,7 +621,37 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "Monitor Interrupts metrics for efficient processor interrupt handling."
+ metrics_description: |
+ Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt.
+ The numbers reported are the counts of the interrupts that have occurred of each type.
+
+ An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs
+ immediate attention. The processor then interrupts its current activities and executes the interrupt handler
+ to deal with the event. This is part of the way a computer multitasks and handles concurrent processing.
+
+ The types of interrupts include:
+
+ - **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when
+ you type something on the keyboard, an interrupt is triggered so the processor can handle the new input.
+
+ - **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily
+ used to switch the CPU among different tasks.
+
+ - **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources.
+
+ - **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc.
+
+ Monitoring `/proc/interrupts` can be used for:
+
+ - **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not
+ configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system
+ performance degradation.
+
+ - **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem.
+
+ - **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you
+ understand what your system is doing. It can provide insights into the system's interaction with hardware,
+ drivers, and other parts of the kernel.
method_description: ""
supported_platforms:</