proc integrations (#15494)

Co-authored-by: ilyam8 <ilya@netdata.cloud>
author: Costa Tsaousis <costa@netdata.cloud> 2023-07-26 01:06:57 +0300
committer: GitHub <noreply@github.com> 2023-07-26 01:06:57 +0300
commit: 065091c3f5d908c5e06ed5ac817999f7dc965b90 (patch)
tree: 70e29c0d74eaf33f69e9e393732d3d051fd3da5d /collectors
parent: accc426c8ab08a75c9e6470a4a89482fb4c4eb17 (diff)
11 files changed, 1086 insertions, 250 deletions
diff --git a/collectors/all.h b/collectors/all.h
index f28865c019..9bfae914ea 100644
--- a/collectors/all.h
+++ b/collectors/all.h
@@ -23,18 +23,6 @@
 #define NETDATA_CHART_PRIO_SYSTEM_IO                    150
 #define NETDATA_CHART_PRIO_SYSTEM_PGPGIO                151
 #define NETDATA_CHART_PRIO_SYSTEM_RAM                   200
-#define NETDATA_CHART_PRIO_SYSTEM_SWAP                  201
-#define NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS            202
-#define NETDATA_CHART_PRIO_SYSTEM_SWAPIO                250
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO               300
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO  301
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE   302
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE     303
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS         304
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT    305
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES  306
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE  307
-#define NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY      308
 #define NETDATA_CHART_PRIO_SYSTEM_NET                   500
 #define NETDATA_CHART_PRIO_SYSTEM_IPV4                  500 // freebsd only
 #define NETDATA_CHART_PRIO_SYSTEM_IP                    501
@@ -87,17 +75,35 @@
 #define NETDATA_CHART_PRIO_MEM_SYSTEM_AVAILABLE       1010
 #define NETDATA_CHART_PRIO_MEM_SYSTEM_OOM_KILL        1020
 #define NETDATA_CHART_PRIO_MEM_SYSTEM_COMMITTED       1030
-#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS        1040
+#define NETDATA_CHART_PRIO_MEM_SWAP                   1035
+#define NETDATA_CHART_PRIO_MEM_SWAP_CALLS             1037
+#define NETDATA_CHART_PRIO_MEM_SWAPIO                 1038
+#define NETDATA_CHART_PRIO_MEM_ZSWAP                  1036
+#define NETDATA_CHART_PRIO_MEM_ZSWAPIO                1037
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO   1038
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE    1039
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE      1040
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS          1041
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT     1042
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES   1043
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE   1044
+#define NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY       1045
+#define NETDATA_CHART_PRIO_MEM_SYSTEM_PGFAULTS        1050
 #define NETDATA_CHART_PRIO_MEM_KERNEL                 1100
 #define NETDATA_CHART_PRIO_MEM_SLAB                   1200
+#define NETDATA_CHART_PRIO_MEM_RECLAIMING             1210
+#define NETDATA_CHART_PRIO_MEM_HIGH_LOW               1211
+#define NETDATA_CHART_PRIO_MEM_CMA                    1212
 #define NETDATA_CHART_PRIO_MEM_HUGEPAGES              1250
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS       1251
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE         1252
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO         1253
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED   1254
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS       1255
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT      1256
-#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT      1257
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_DETAILS      1251
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS       1252
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE         1253
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO         1254
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED   1255
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS       1256
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT      1257
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT      1258
+#define NETDATA_CHART_PRIO_MEM_DIRECTMAP              1260
 #define NETDATA_CHART_PRIO_MEM_KSM                    1300
 #define NETDATA_CHART_PRIO_MEM_KSM_SAVINGS            1301
 #define NETDATA_CHART_PRIO_MEM_KSM_RATIOS             1302
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c
index fb805e63ce..b7a6a46485 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.c
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.c
@@ -62,6 +62,8 @@ static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
 static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
 static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
 static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
+static int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO;
+static int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO;
 
 static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
 static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
@@ -828,6 +830,7 @@ struct cgroup {
     struct pressure cpu_pressure;
     struct pressure io_pressure;
     struct pressure memory_pressure;
+    struct pressure irq_pressure;
 
     // per cgroup charts
     RRDSET *st_cpu;
@@ -1451,28 +1454,33 @@ static inline void cgroup2_read_pressure(struct pressure *res) {
             return;
         }
 
-        res->some.share_time.value10 = strtod(procfile_lineword(ff, 0, 2), NULL);
-        res->some.share_time.value60 = strtod(procfile_lineword(ff, 0, 4), NULL);
-        res->some.share_time.value300 = strtod(procfile_lineword(ff, 0, 6), NULL);
-        res->some.total_time.value_total = str2ull(procfile_lineword(ff, 0, 8), NULL) / 1000; // us->ms
+        bool did_some = false, did_full = false;
 
-        if (lines > 2) {
-            res->full.share_time.value10 = strtod(procfile_lineword(ff, 1, 2), NULL);
-            res->full.share_time.value60 = strtod(procfile_lineword(ff, 1, 4), NULL);
-            res->full.share_time.value300 = strtod(procfile_lineword(ff, 1, 6), NULL);
-            res->full.total_time.value_total = str2ull(procfile_lineword(ff, 1, 8), NULL) / 1000; // us->ms
+        for(size_t l = 0; l < lines ;l++) {
+            const char *key = procfile_lineword(ff, l, 0);
+            if(strcmp(key, "some") == 0) {
+                res->some.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL);
+                res->some.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL);
+                res->some.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL);
+                res->some.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms
+                did_some = true;
+            }
+            else if(strcmp(key, "full") == 0) {
+                res->full.share_time.value10 = strtod(procfile_lineword(ff, l, 2), NULL);
+                res->full.share_time.value60 = strtod(procfile_lineword(ff, l, 4), NULL);
+                res->full.share_time.value300 = strtod(procfile_lineword(ff, l, 6), NULL);
+                res->full.total_time.value_total = str2ull(procfile_lineword(ff, l, 8), NULL) / 1000; // us->ms
+                did_full = true;
+            }
         }
 
-        res->updated = 1;
+        res->updated = (did_full || did_some) ? 1 : 0;
 
-        if (unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO)) {
-            res->some.enabled = CONFIG_BOOLEAN_YES;
-            if (lines > 2) {
-                res->full.enabled = CONFIG_BOOLEAN_YES;
-            } else {
-                res->full.enabled = CONFIG_BOOLEAN_NO;
-            }
-        }
+        if(unlikely(res->some.enabled == CONFIG_BOOLEAN_AUTO))
+            res->some.enabled = (did_some) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
+
+        if(unlikely(res->full.enabled == CONFIG_BOOLEAN_AUTO))
+            res->full.enabled = (did_full) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
     }
 }
 
@@ -1637,6 +1645,7 @@ static inline void read_cgroup(struct cgroup *cg) {
         cgroup2_read_pressure(&cg->cpu_pressure);
         cgroup2_read_pressure(&cg->io_pressure);
         cgroup2_read_pressure(&cg->memory_pressure);
+        cgroup2_read_pressure(&cg->irq_pressure);
         cgroup_read_memory(&cg->memory, 1);
     }
 }
@@ -1851,6 +1860,7 @@ static inline void cgroup_free(struct cgroup *cg) {
     free_pressure(&cg->cpu_pressure);
     free_pressure(&cg->io_pressure);
     free_pressure(&cg->memory_pressure);
+    free_pressure(&cg->irq_pressure);
 
     freez(cg->id);
     freez(cg->intermediate_id);
@@ -2465,6 +2475,18 @@ static inline void discovery_update_filenames() {
                     netdata_log_debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
                 }
             }
+
+            if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) {
+                snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id);
+                if (likely(stat(filename, &buf) != -1)) {
+                    cg->irq_pressure.filename = strdupz(filename);
+                    cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some;
+                    cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full;
+                    netdata_log_debug(D_CGROUP, "irq.pressure filename for cgroup '%s': '%s'", cg->id, cg->irq_pressure.filename);
+                } else {
+                    netdata_log_debug(D_CGROUP, "irq.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
+                }
+            }
         }
     }
 }
@@ -4643,6 +4665,112 @@ void update_cgroup_charts(int update_every) {
                 update_pressure_charts(pcs);
             }
 
+            res = &cg->irq_pressure;
+
+            if (likely(res->updated && res->some.enabled)) {
+                struct pressure_charts *pcs;
+                pcs = &res->some;
+
+                if (unlikely(!pcs->share_time.st)) {
+                    RRDSET *chart;
+                    snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure");
+                    chart = pcs->share_time.st = rrdset_create_localhost(
+                            cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+                    , "irq_some_pressure"
+                    , NULL
+                    , "interrupts"
+                    , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure"
+                    , title
+                    , "percentage"
+                    , PLUGIN_CGROUPS_NAME
+                    , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+                    , cgroup_containers_chart_priority + 2310
+                    , update_every
+                    , RRDSET_TYPE_LINE
+                    );
+                    rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
+                    pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+                    pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+                    pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+                }
+
+                if (unlikely(!pcs->total_time.st)) {
+                    RRDSET *chart;
+                    snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure stall time");
+                    chart = pcs->total_time.st = rrdset_create_localhost(
+                            cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+                    , "irq_some_pressure_stall_time"
+                    , NULL
+                    , "interrupts"
+                    , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time"
+                    , title
+                    , "ms"
+                    , PLUGIN_CGROUPS_NAME
+                    , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+                    , cgroup_containers_chart_priority + 2330
+                    , update_every
+                    , RRDSET_TYPE_LINE
+                    );
+                    rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
+                    pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+                }
+
+                update_pressure_charts(pcs);
+            }
+
+            if (likely(res->updated && res->full.enabled)) {
+                struct pressure_charts *pcs;
+                pcs = &res->full;
+
+                if (unlikely(!pcs->share_time.st)) {
+                    RRDSET *chart;
+                    snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure");
+
+                    chart = pcs->share_time.st = rrdset_create_localhost(
+                            cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+                    , "irq_full_pressure"
+                    , NULL
+                    , "interrupts"
+                    , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure"
+                    , title
+                    , "percentage"
+                    , PLUGIN_CGROUPS_NAME
+                    , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+                    , cgroup_containers_chart_priority + 2350
+                    , update_every
+                    , RRDSET_TYPE_LINE
+                    );
+
+                    rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
+                    pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+                    pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+                    pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+                }
+
+                if (unlikely(!pcs->total_time.st)) {
+                    RRDSET *chart;
+                    snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure stall time");
+                    chart = pcs->total_time.st = rrdset_create_localhost(
+                            cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
+                    , "irq_full_pressure_stall_time"
+                    , NULL
+                    , "interrupts"
+                    , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time"
+                    , title
+                    , "ms"
+                    , PLUGIN_CGROUPS_NAME
+                    , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
+                    , cgroup_containers_chart_priority + 2370
+                    , update_every
+                    , RRDSET_TYPE_LINE
+                    );
+                    rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
+                    pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+                }
+
+                update_pressure_charts(pcs);
+            }
+
             res = &cg->io_pressure;
 
             if (likely(res->updated && res->some.enabled)) {
diff --git a/collectors/debugfs.plugin/debugfs_zswap.c b/collectors/debugfs.plugin/debugfs_zswap.c
index c8fc0f030a..502a04f1f8 100644
--- a/collectors/debugfs.plugin/debugfs_zswap.c
+++ b/collectors/debugfs.plugin/debugfs_zswap.c
@@ -38,7 +38,7 @@ static struct netdata_zswap_metric zswap_calculated_metrics[] = {
      .charttype = RRDSET_TYPE_LINE,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_COMPRESS_RATIO,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_COMPRESS_RATIO,
      .divisor = 100,
      .convertv = NULL,
      .value = -1},
@@ -71,7 +71,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
      .charttype = RRDSET_TYPE_AREA,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_TOT_SIZE,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_TOT_SIZE,
      .divisor = 1,
      .convertv = NULL,
      .value = -1},
@@ -84,7 +84,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
      .charttype = RRDSET_TYPE_AREA,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_STORED_PAGE,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_STORED_PAGE,
      .divisor = 1,
      .convertv = pages_to_bytes,
      .value = -1},
@@ -97,7 +97,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
      .charttype = RRDSET_TYPE_LINE,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_POOL_LIM_HIT,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_POOL_LIM_HIT,
      .divisor = 1,
      .convertv = NULL,
      .value = -1},
@@ -110,7 +110,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
      .charttype = RRDSET_TYPE_AREA,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_WRT_BACK_PAGES,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_WRT_BACK_PAGES,
      .divisor = 1,
      .convertv = pages_to_bytes,
      .value = -1},
@@ -123,7 +123,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
      .charttype = RRDSET_TYPE_AREA,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_SAME_FILL_PAGE,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_SAME_FILL_PAGE,
      .divisor = 1,
      .convertv = pages_to_bytes,
      .value = -1},
@@ -136,7 +136,7 @@ static struct netdata_zswap_metric zswap_independent_metrics[] = {
      .charttype = RRDSET_TYPE_LINE,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_DUPP_ENTRY,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_DUPP_ENTRY,
      .divisor = 1,
      .convertv = NULL,
      .value = -1},
@@ -175,7 +175,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
      .charttype = RRDSET_TYPE_STACKED,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
      .divisor = 1,
      .convertv = NULL,
      .value = -1},
@@ -188,7 +188,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
      .charttype = RRDSET_TYPE_STACKED,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
      .divisor = 1,
      .convertv = NULL,
      .value = -1},
@@ -201,7 +201,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
      .charttype = RRDSET_TYPE_STACKED,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
      .divisor = 1,
      .convertv = NULL,
      .value = -1},
@@ -214,7 +214,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
      .charttype = RRDSET_TYPE_STACKED,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
      .divisor = 1,
      .convertv = NULL,
      .value = -1},
@@ -227,7 +227,7 @@ static struct netdata_zswap_metric zswap_rejected_metrics[] = {
      .charttype = RRDSET_TYPE_STACKED,
      .enabled = CONFIG_BOOLEAN_YES,
      .chart_created = CONFIG_BOOLEAN_NO,
-     .prio = NETDATA_CHART_PRIO_SYSTEM_ZSWAP_REJECTS,
+     .prio = NETDATA_CHART_PRIO_MEM_ZSWAP_REJECTS,
      .divisor = 1,
      .convertv = NULL,
      .value = -1},
@@ -266,7 +266,7 @@ zswap_send_chart(struct netdata_zswap_metric *metric, int update_every, const ch
 {
     fprintf(
         stdout,
-        "CHART system.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n",
+        "CHART mem.zswap_%s '' '%s' '%s' 'zswap' '' '%s' %d %d '%s' 'debugfs.plugin' '%s'\n",
         metric->chart_id,
         metric->title,
         metric->units,
@@ -291,7 +291,7 @@ static void zswap_send_dimension(struct netdata_zswap_metric *metric)
 
 static void zswap_send_begin(struct netdata_zswap_metric *metric)
 {
-    fprintf(stdout, "BEGIN system.zswap_%s\n", metric->chart_id);
+    fprintf(stdout, "BEGIN mem.zswap_%s\n", metric->chart_id);
 }
 
 static void zswap_send_set(struct netdata_zswap_metric *metric)
diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c
index ff74ee842e..7b1dad5ecb 100644
--- a/collectors/ebpf.plugin/ebpf_swap.c
+++ b/collectors/ebpf.plugin/ebpf_swap.c
@@ -322,13 +322,13 @@ void ebpf_obsolete_swap_apps_charts(struct ebpf_module *em)
  */
 static void ebpf_obsolete_swap_global(ebpf_module_t *em)
 {
-    ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP,
+    ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
                               NETDATA_MEM_SWAP_CHART,
                               "Calls to access swap memory",
                               EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU,
                               NETDATA_EBPF_CHART_TYPE_LINE,
                               NULL,
-                              NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS,
+                              NETDATA_CHART_PRIO_MEM_SWAP_CALLS,
                               em->update_every);
 }
 
@@ -914,12 +914,12 @@ static void ebpf_swap_allocate_global_vectors(int apps)
  */
 static void ebpf_create_swap_charts(int update_every)
 {
-    ebpf_create_chart(NETDATA_EBPF_SYSTEM_GROUP, NETDATA_MEM_SWAP_CHART,
+    ebpf_create_chart(NETDATA_EBPF_MEMORY_GROUP, NETDATA_MEM_SWAP_CHART,
                       "Calls to access swap memory",
                       EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_SWAP_SUBMENU,
                       NULL,
                       NETDATA_EBPF_CHART_TYPE_LINE,
-                      NETDATA_CHART_PRIO_SYSTEM_SWAP_CALLS,
+                      NETDATA_CHART_PRIO_MEM_SWAP_CALLS,
                       ebpf_create_global_dimension,
                       swap_publish_aggregated, NETDATA_SWAP_END,
                       update_every, NETDATA_EBPF_MODULE_NAME_SWAP);
diff --git a/collectors/freebsd.plugin/freebsd_sysctl.c b/collectors/freebsd.plugin/freebsd_sysctl.c
index c8aa5dad54..84289aafa8 100644
--- a/collectors/freebsd.plugin/freebsd_sysctl.c
+++ b/collectors/freebsd.plugin/freebsd_sysctl.c
@@ -1035,7 +1035,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) {
 
         if (unlikely(!st)) {
             st = rrdset_create_localhost(
-                    "system",
+                    "mem",
                     "swapio",
                     NULL,
                     "swap",
@@ -1044,7 +1044,7 @@ int do_vm_stats_sys_v_swappgs(int update_every, usec_t dt) {
                     "KiB/s",
                     "freebsd.plugin",
                     "vm.stats.vm.v_swappgs",
-                    NETDATA_CHART_PRIO_SYSTEM_SWAPIO,
+                    NETDATA_CHART_PRIO_MEM_SWAPIO,
                     update_every,
                     RRDSET_TYPE_AREA
             );
diff --git a/collectors/proc.plugin/multi_metadata.yaml b/collectors/proc.plugin/multi_metadata.yaml
index 04e66ddfe6..bdbce89f98 100644
--- a/collectors/proc.plugin/multi_metadata.yaml
+++ b/collectors/proc.plugin/multi_metadata.yaml
@@ -20,27 +20,50 @@ modules:
       most_popular: false
     overview:
       data_collection:
-        metrics_description: "This integration provides a collection of statistics about the system such as CPU utilization, process counts and more."
+        metrics_description: |
+          CPU utilization, states and frequencies and key Linux system performance metrics.
+          
+          The `/proc/stat` file provides various types of system statistics:
+          
+          - The overall system CPU usage statistics
+          - Per CPU core statistics
+          - The total context switching of the system
+          - The total number of processes running
+          - The total CPU interrupts
+          - The total CPU softirqs
+          
+          The collector also reads:
+          
+          - `/proc/schedstat` for statistics about the process scheduler in the Linux kernel.
+          - `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems.
+          - `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system.
+          - `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core.
+          - `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states.
+          - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system.
+          - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started.
         method_description: ""
       supported_platforms:
-        include: []
+        include: [ "linux" ]
         exclude: []
-      multi_instance: true
+      multi_instance: false
       additional_permissions:
         description: ""
       default_behavior:
         auto_detection:
-          description: ""
+          description: |
+            The collector auto-detects all metrics. No configuration is needed.
         limits:
           description: ""
         performance_impact:
-          description: ""
+          description: |
+            The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available.
     setup:
       prerequisites:
         list: []
       configuration:
         file:
-          name: ""
+          section_name: "plugin:proc:/proc/stat"
+          name: "netdata.conf"
           description: ""
         options:
           description: ""
@@ -187,12 +210,43 @@ modules:
       most_popular: false
     overview:
       data_collection:
-        metrics_description: ""
+        metrics_description: |
+          Entropy, a measure of the randomness or unpredictability of data.
+          
+          In the context of cryptography, entropy is used to generate random numbers or keys that are essential for
+          secure communication and encryption. Without a good source of entropy, cryptographic protocols can become
+          vulnerable to attacks that exploit the predictability of the generated keys.
+          
+          In most operating systems, entropy is generated by collecting random events from various sources, such as
+          hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool
+          of entropy, which is then used to generate random numbers when needed.
+          
+          The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs
+          to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device,
+          which blocks until enough entropy is available to generate the requested numbers. This ensures that the
+          generated numbers are truly random and not predictable. 
+          
+          However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing
+          programs that rely on random numbers to slow down or even freeze. This is especially problematic for
+          cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH.
+          
+          To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality
+          entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or
+          radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than
+          software-based sources.
+          
+          One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used
+          for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates
+          high-quality entropy, which can be used to seed the pool of entropy in the operating system.
+          
+          Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by
+          exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions
+          can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions.
         method_description: ""
       supported_platforms:
-        include: []
+        include: [ "linux" ]
         exclude: []
-      multi_instance: true
+      multi_instance: false
       additional_permissions:
         description: ""
       default_behavior:
@@ -264,12 +318,23 @@ modules:
       most_popular: false
     overview:
       data_collection:
-        metrics_description: "This integration provides the amount of time the system has been up (running)."
+        metrics_description: |
+          The amount of time the system has been up (running).
+          
+          Uptime is a critical aspect of overall system performance:
+          
+          - **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes.
+          - **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends.
+          - **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems.
+          - **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause.
+          - **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others.
+          - **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention.
+          - **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability.
         method_description: ""
       supported_platforms:
-        include: []
+        include: [ "linux" ]
         exclude: []
-      multi_instance: true
+      multi_instance: false
       additional_permissions:
         description: ""
       default_behavior:
@@ -340,12 +405,33 @@ modules:
       most_popular: false
     overview:
       data_collection:
-        metrics_description: "This integration provides information about process, memory, swap space utilization and many more."
+        metrics_description: |
+          Linux Virtual memory subsystem.
+          
+          Information about memory management, indicating how effectively the kernel allocates and frees
+          memory resources in response to system demands.
+          
+          Monitors page faults, which occur when a process requests a portion of its memory that isn't
+          immediately available. Monitoring these events can help diagnose inefficiencies in memory management and
+          provide insights into application behavior.
+          
+          Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to
+          swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap,
+          a compressed cache for swap pages, and provides insights into its usage and performance implications.
+          
+          In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance
+          memory resources between host and guest systems.
+          
+          For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which
+          can impact the performance based on the memory access times.
+
+          The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out
+          of memory resources.
         method_description: ""
       supported_platforms:
-        include: []
+        include: [ "linux" ]
         exclude: []
-      multi_instance: true
+      multi_instance: false
       additional_permissions:
         description: ""
       default_behavior:
@@ -535,7 +621,37 @@ modules:
       most_popular: false
     overview:
       data_collection:
-        metrics_description: "Monitor Interrupts metrics for efficient processor interrupt handling."
+        metrics_description: |
+          Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt.
+          The numbers reported are the counts of the interrupts that have occurred of each type.
+
+          An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs
+          immediate attention. The processor then interrupts its current activities and executes the interrupt handler
+          to deal with the event. This is part of the way a computer multitasks and handles concurrent processing.
+          
+          The types of interrupts include:
+
+          - **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when
+            you type something on the keyboard, an interrupt is triggered so the processor can handle the new input.
+
+          - **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily
+            used to switch the CPU among different tasks.
+
+          - **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources.
+
+          - **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc.
+          
+          Monitoring `/proc/interrupts` can be used for:
+
+          - **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not
+            configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system
+            performance degradation.
+
+          - **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem.
+
+          - **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you
+            understand what your system is doing. It can provide insights into the system's interaction with hardware,
+            drivers, and other parts of the kernel.
         method_description: ""
       supported_platforms:</
author	Costa Tsaousis <costa@netdata.cloud>	2023-07-26 01:06:57 +0300
committer	GitHub <noreply@github.com>	2023-07-26 01:06:57 +0300
commit	065091c3f5d908c5e06ed5ac817999f7dc965b90 (patch)
tree	70e29c0d74eaf33f69e9e393732d3d051fd3da5d /collectors
parent	accc426c8ab08a75c9e6470a4a89482fb4c4eb17 (diff)