summaryrefslogtreecommitdiffstats
path: root/collectors
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-05-03 16:35:26 +0300
committerGitHub <noreply@github.com>2023-05-03 16:35:26 +0300
commit50183dc5b1022c94b99fd30b7c9aad5e7e6059fe (patch)
tree20b467388c40f85a62a700827a4ffc3e3b05c9c0 /collectors
parent06a2b68b92b4f7fe28ef093d57b5bce6368f5b08 (diff)
add support for monitoring thp, ballooning, zswap, ksm cow (#15000)
* add support for monitoring thp, ballooning, zswap, ksm cow * update proc metrics.csv * updated metrics.csv * Revert "updated metrics.csv" This reverts commit 54b9464be7c0d805220bbcb2144b4b4d7780a9c5. * replaced prog.plugin with proc.plugin --------- Co-authored-by: ilyam8 <ilya@netdata.cloud>
Diffstat (limited to 'collectors')
-rw-r--r--collectors/all.h10
-rw-r--r--collectors/proc.plugin/metrics.csv128
-rw-r--r--collectors/proc.plugin/proc_vmstat.c503
3 files changed, 580 insertions, 61 deletions
diff --git a/collectors/all.h b/collectors/all.h
index 621285c40f..a0ce5d7fc4 100644
--- a/collectors/all.h
+++ b/collectors/all.h
@@ -25,6 +25,7 @@
#define NETDATA_CHART_PRIO_SYSTEM_RAM 200
#define NETDATA_CHART_PRIO_SYSTEM_SWAP 201
#define NETDATA_CHART_PRIO_SYSTEM_SWAPIO 250
+#define NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO 300
#define NETDATA_CHART_PRIO_SYSTEM_NET 500
#define NETDATA_CHART_PRIO_SYSTEM_IPV4 500 // freebsd only
#define NETDATA_CHART_PRIO_SYSTEM_IP 501
@@ -80,9 +81,18 @@
#define NETDATA_CHART_PRIO_MEM_KERNEL 1100
#define NETDATA_CHART_PRIO_MEM_SLAB 1200
#define NETDATA_CHART_PRIO_MEM_HUGEPAGES 1250
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS 1251
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE 1252
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO 1253
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_KHUGEPAGED 1254
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SPLITS 1255
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_SWAPOUT 1256
+#define NETDATA_CHART_PRIO_MEM_HUGEPAGES_COMPACT 1257
#define NETDATA_CHART_PRIO_MEM_KSM 1300
#define NETDATA_CHART_PRIO_MEM_KSM_SAVINGS 1301
#define NETDATA_CHART_PRIO_MEM_KSM_RATIOS 1302
+#define NETDATA_CHART_PRIO_MEM_KSM_COW 1303
+#define NETDATA_CHART_PRIO_MEM_BALLOON 1350
#define NETDATA_CHART_PRIO_MEM_NUMA 1400
#define NETDATA_CHART_PRIO_MEM_NUMA_NODES 1410
#define NETDATA_CHART_PRIO_MEM_PAGEFRAG 1450
diff --git a/collectors/proc.plugin/metrics.csv b/collectors/proc.plugin/metrics.csv
index 31c4f5ab0a..ea0d1b3646 100644
--- a/collectors/proc.plugin/metrics.csv
+++ b/collectors/proc.plugin/metrics.csv
@@ -1,19 +1,19 @@
metric,scope,dimensions,unit,description,chart_type,labels,plugin,module
-system.cpu,,"guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle",percentage,Total CPU utilization,stacked,,prog.plugin,/proc/stat
-cpu.cpu,cpu core,"guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle",percentage,Core utilization,stacked,cpu,prog.plugin,/proc/stat
-system.intr,,interrupts,interrupts/s,CPU Interrupts,line,,prog.plugin,/proc/stat
-system.ctxt,,switches,context switches/s,CPU Context Switches,line,,prog.plugin,/proc/stat
-system.forks,,started,processes/s,Started Processes,line,,prog.plugin,/proc/stat
-system.processes,,"running, blocked",processes,System Processes,line,,prog.plugin,/proc/stat
-cpu.core_throttling,,a dimension per cpu core,events/s,Core Thermal Throttling Events,line,,prog.plugin,/proc/stat
-cpu.package_throttling,,a dimension per package,events/s,Package Thermal Throttling Events,line,,prog.plugin,/proc/stat
-cpu.cpufreq,,a dimension per cpu core,MHz,Current CPU Frequency,line,,prog.plugin,/proc/stat
-cpuidle.cpu_cstate_residency_time,cpu core,a dimension per c-state,percentage,C-state residency time,stacked,cpu,prog.plugin,/proc/stat
-system.entropy,,entropy,entropy,Available Entropy,line,,prog.plugin,/proc/sys/kernel/random/entropy_avail
-system.uptime,,uptime,seconds,System Uptime,line,,prog.plugin,/proc/uptime
-system.swapio,,"in, out",KiB/s,Swap I/O,area,,prog.plugin,/proc/vmstat
-system.pgpgio,,"in, out",KiB/s,Memory Paged from/to disk,area,,prog.plugin,/proc/vmstat
-system.pgfaults,,"minor, major",faults/s,Memory Page Faults,line,,prog.plugin,/proc/vmstat
+system.cpu,,"guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle",percentage,Total CPU utilization,stacked,,proc.plugin,/proc/stat
+cpu.cpu,cpu core,"guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle",percentage,Core utilization,stacked,cpu,proc.plugin,/proc/stat
+system.intr,,interrupts,interrupts/s,CPU Interrupts,line,,proc.plugin,/proc/stat
+system.ctxt,,switches,context switches/s,CPU Context Switches,line,,proc.plugin,/proc/stat
+system.forks,,started,processes/s,Started Processes,line,,proc.plugin,/proc/stat
+system.processes,,"running, blocked",processes,System Processes,line,,proc.plugin,/proc/stat
+cpu.core_throttling,,a dimension per cpu core,events/s,Core Thermal Throttling Events,line,,proc.plugin,/proc/stat
+cpu.package_throttling,,a dimension per package,events/s,Package Thermal Throttling Events,line,,proc.plugin,/proc/stat
+cpu.cpufreq,,a dimension per cpu core,MHz,Current CPU Frequency,line,,proc.plugin,/proc/stat
+cpuidle.cpu_cstate_residency_time,cpu core,a dimension per c-state,percentage,C-state residency time,stacked,cpu,proc.plugin,/proc/stat
+system.entropy,,entropy,entropy,Available Entropy,line,,proc.plugin,/proc/sys/kernel/random/entropy_avail
+system.uptime,,uptime,seconds,System Uptime,line,,proc.plugin,/proc/uptime
+system.swapio,,"in, out",KiB/s,Swap I/O,area,,proc.plugin,/proc/vmstat
+system.pgpgio,,"in, out",KiB/s,Memory Paged from/to disk,area,,proc.plugin,/proc/vmstat
+system.pgfaults,,"minor, major",faults/s,Memory Page Faults,line,,proc.plugin,/proc/vmstat
system.interrupts,,a dimension per device,interrupts/s,System interrupts,stacked,,proc.plugin,/proc/interrupts
cpu.interrupts,cpu core,a dimension per device,interrupts/s,CPU interrupts,stacked,cpu,proc.plugin,/proc/interrupts
system.load,,"load1, load5, load15",load,System Load Average,line,,proc.plugin,/proc/loadavg
@@ -44,52 +44,62 @@ mem.kernel,,"Slab, KernelStack, PageTables, VmallocUsed, Percpu",MiB,Memory Used
mem.slab,,"reclaimable, unreclaimable",MiB,Reclaimable Kernel Memory,stacked,,proc.plugin,/proc/meminfo
mem.hugepage,,"free, used, surplus, reserved",MiB,Dedicated HugePages Memory,stacked,,proc.plugin,/proc/meminfo
mem.transparent_hugepages,,"anonymous, shmem",MiB,Transparent HugePages Memory,stacked,,proc.plugin,/proc/meminfo
+mem.balloon,,"inflate, deflate, migrate",KiB/s,Memory Ballooning Operations,line,,proc.plugin,/proc/vmstat
+mem.zswapio,,"in, out",KiB/s,ZSwap I/O,area,,proc.plugin,/proc/vmstat
+mem.ksm_cow,,"swapin, write",KiB/s,KSM Copy On Write Operations,line,,proc.plugin,/proc/vmstat
+mem.thp_faults,,"alloc, fallback, fallback_charge",events/s,Transparent Huge Page Fault Allocations,line,,proc.plugin,/proc/vmstat
+mem.thp_file,,"alloc, fallback, mapped, fallback_charge",events/s,Transparent Huge Page File Allocations,line,,proc.plugin,/proc/vmstat
+mem.thp_zero,,"alloc, failed",events/s,Transparent Huge Zero Page Allocations,line,,proc.plugin,/proc/vmstat
+mem.thp_collapse,,"alloc, failed",events/s,Transparent Huge Pages Collapsed by khugepaged,line,,proc.plugin,/proc/vmstat
+mem.thp_split,,"split, failed, split_pmd, split_deferred",events/s,Transparent Huge Page Splits,line,,proc.plugin,/proc/vmstat
+mem.thp_swapout,,"swapout, fallback",events/s,Transparent Huge Pages Swap Out,line,,proc.plugin,/proc/vmstat
+mem.thp_compact,,"success, fail, stall",events/s,Transparent Huge Pages Compaction,line,,proc.plugin,/proc/vmstat
mem.pagetype_global,,a dimension per pagesize,B,System orders available,stacked,,proc.plugin,/proc/pagetypeinfo
mem.pagetype,"node, zone, type",a dimension per pagesize,B,"pagetype_Node{node}_{zone}_{type}",stacked,"node_id, node_zone, node_type",proc.plugin,/proc/pagetypeinfo
-mem.oom_kill,,kills,kills/s,Out of Memory Kills,line,,prog.plugin,/proc/vmstat
-mem.numa,,"local, foreign, interleave, other, pte_updates, huge_pte_updates, hint_faults, hint_faults_local, pages_migrated",events/s,NUMA events,line,,prog.plugin,/proc/vmstat
-mem.ecc_ce,,a dimension per mem controller,errors,ECC Memory Correctable Errors,line,,prog.plugin,/sys/devices/system/edac/mc
-mem.ecc_ue,,a dimension per mem controller,errors,ECC Memory Uncorrectable Errors,line,,prog.plugin,/sys/devices/system/edac/mc
-mem.numa_nodes,numa node,"hit, miss, local, foreign, interleave, other",events/s,NUMA events,line,numa_node,prog.plugin,/sys/devices/system/node
-mem.ksm,,"shared, unshared, sharing, volatile",MiB,Kernel Same Page Merging,stacked,,prog.plugin,/sys/kernel/mm/ksm
-mem.ksm_savings,,"savings, offered",MiB,Kernel Same Page Merging Savings,area,,prog.plugin,/sys/kernel/mm/ksm
-mem.ksm_ratios,,savings,percentage,Kernel Same Page Merging Effectiveness,line,,prog.plugin,/sys/kernel/mm/ksm
-mem.zram_usage,zram device,"compressed, metadata",MiB,ZRAM Memory Usage,area,device,prog.plugin,/sys/block/zram
-mem.zram_savings,zram device,"savings, original",MiB,ZRAM Memory Savings,area,device,prog.plugin,/sys/block/zram
-mem.zram_ratio,zram device,ratio,ratio,ZRAM Compression Ratio (original to compressed),line,device,prog.plugin,/sys/block/zram
-mem.zram_efficiency,zram device,percent,percentage,ZRAM Efficiency,line,device,prog.plugin,/sys/block/zram
-system.ipc_semaphores,,semaphores,semaphores,IPC Semaphores,area,,prog.plugin,ipc
-system.ipc_semaphore_arrays,,arrays,arrays,IPC Semaphore Arrays,area,,prog.plugin,ipc
-system.message_queue_message,,a dimension per queue,messages,IPC Message Queue Number of Messages,stacked,,prog.plugin,ipc
-system.message_queue_bytes,,a dimension per queue,bytes,IPC Message Queue Used Bytes,stacked,,prog.plugin,ipc
-system.shared_memory_segments,,segments,segments,IPC Shared Memory Number of Segments,stacked,,prog.plugin,ipc
-system.shared_memory_bytes,,bytes,bytes,IPC Shared Memory Used Bytes,stacked,,prog.plugin,ipc
-system.io,,"in, out",KiB/s,Disk I/O,area,,prog.plugin,/proc/diskstats
-disk.io,disk,"reads, writes",KiB/s,Disk I/O Bandwidth,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk_ext.io,disk,discards,KiB/s,Amount of Discarded Data,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.ops,disk,"reads, writes",operations/s,Disk Completed I/O Operations,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk_ext.ops,disk,"discards, flushes",operations/s,Disk Completed Extended I/O Operations,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.qops,disk,operations,operations,Disk Current I/O Operations,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.backlog,disk,backlog,milliseconds,Disk Backlog,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.busy,disk,busy,milliseconds,Disk Busy Time,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.util,disk,utilization,% of time working,Disk Utilization Time,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.mops,disk,"reads, writes",merged operations/s,Disk Merged Operations,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk_ext.mops,disk,discards,merged operations/s,Disk Merged Discard Operations,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.iotime,disk,"reads, writes",milliseconds/s,Disk Total I/O Time,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk_ext.iotime,disk,"discards, flushes",milliseconds/s,Disk Total I/O Time for Extended Operations,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.await,disk,"reads, writes",milliseconds/operation,Average Completed I/O Operation Time,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk_ext.await,disk,"discards, flushes",milliseconds/operation,Average Completed Extended I/O Operation Time,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.avgsz,disk,"reads, writes",KiB/operation,Average Completed I/O Operation Bandwidth,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk_ext.avgsz,disk,discards,KiB/operation,Average Amount of Discarded Data,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.svctm,disk,svctm,milliseconds/operation,Average Service Time,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.bcache_cache_alloc,disk,"ununsed, dirty, clean, metadata, undefined",percentage,BCache Cache Allocations,stacked,,prog.plugin,/proc/diskstats
-disk.bcache_hit_ratio,disk,"5min, 1hour, 1day, ever",percentage,BCache Cache Hit Ratio,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.bcache_rates,disk,"congested, writeback",KiB/s,BCache Rates,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.bcache_size,disk,dirty,MiB,BCache Cache Sizes,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.bcache_usage,disk,avail,percentage,BCache Cache Usage,area,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.bcache_cache_read_races,disk,"races, errors",operations/s,BCache Cache Read Races,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.bcache,disk,"hits, misses, collisions, readaheads",operations/s,BCache Cache I/O Operations,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
-disk.bcache_bypass,disk,"hits, misses",operations/s,BCache Cache Bypass I/O Operations,line,"device, mount_point, device_type",prog.plugin,/proc/diskstats
+mem.oom_kill,,kills,kills/s,Out of Memory Kills,line,,proc.plugin,/proc/vmstat
+mem.numa,,"local, foreign, interleave, other, pte_updates, huge_pte_updates, hint_faults, hint_faults_local, pages_migrated",events/s,NUMA events,line,,proc.plugin,/proc/vmstat
+mem.ecc_ce,,a dimension per mem controller,errors,ECC Memory Correctable Errors,line,,proc.plugin,/sys/devices/system/edac/mc
+mem.ecc_ue,,a dimension per mem controller,errors,ECC Memory Uncorrectable Errors,line,,proc.plugin,/sys/devices/system/edac/mc
+mem.numa_nodes,numa node,"hit, miss, local, foreign, interleave, other",events/s,NUMA events,line,numa_node,proc.plugin,/sys/devices/system/node
+mem.ksm,,"shared, unshared, sharing, volatile",MiB,Kernel Same Page Merging,stacked,,proc.plugin,/sys/kernel/mm/ksm
+mem.ksm_savings,,"savings, offered",MiB,Kernel Same Page Merging Savings,area,,proc.plugin,/sys/kernel/mm/ksm
+mem.ksm_ratios,,savings,percentage,Kernel Same Page Merging Effectiveness,line,,proc.plugin,/sys/kernel/mm/ksm
+mem.zram_usage,zram device,"compressed, metadata",MiB,ZRAM Memory Usage,area,device,proc.plugin,/sys/block/zram
+mem.zram_savings,zram device,"savings, original",MiB,ZRAM Memory Savings,area,device,proc.plugin,/sys/block/zram
+mem.zram_ratio,zram device,ratio,ratio,ZRAM Compression Ratio (original to compressed),line,device,proc.plugin,/sys/block/zram
+mem.zram_efficiency,zram device,percent,percentage,ZRAM Efficiency,line,device,proc.plugin,/sys/block/zram
+system.ipc_semaphores,,semaphores,semaphores,IPC Semaphores,area,,proc.plugin,ipc
+system.ipc_semaphore_arrays,,arrays,arrays,IPC Semaphore Arrays,area,,proc.plugin,ipc
+system.message_queue_message,,a dimension per queue,messages,IPC Message Queue Number of Messages,stacked,,proc.plugin,ipc
+system.message_queue_bytes,,a dimension per queue,bytes,IPC Message Queue Used Bytes,stacked,,proc.plugin,ipc
+system.shared_memory_segments,,segments,segments,IPC Shared Memory Number of Segments,stacked,,proc.plugin,ipc
+system.shared_memory_bytes,,bytes,bytes,IPC Shared Memory Used Bytes,stacked,,proc.plugin,ipc
+system.io,,"in, out",KiB/s,Disk I/O,area,,proc.plugin,/proc/diskstats
+disk.io,disk,"reads, writes",KiB/s,Disk I/O Bandwidth,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk_ext.io,disk,discards,KiB/s,Amount of Discarded Data,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.ops,disk,"reads, writes",operations/s,Disk Completed I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk_ext.ops,disk,"discards, flushes",operations/s,Disk Completed Extended I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.qops,disk,operations,operations,Disk Current I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.backlog,disk,backlog,milliseconds,Disk Backlog,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.busy,disk,busy,milliseconds,Disk Busy Time,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.util,disk,utilization,% of time working,Disk Utilization Time,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.mops,disk,"reads, writes",merged operations/s,Disk Merged Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk_ext.mops,disk,discards,merged operations/s,Disk Merged Discard Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.iotime,disk,"reads, writes",milliseconds/s,Disk Total I/O Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk_ext.iotime,disk,"discards, flushes",milliseconds/s,Disk Total I/O Time for Extended Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.await,disk,"reads, writes",milliseconds/operation,Average Completed I/O Operation Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk_ext.await,disk,"discards, flushes",milliseconds/operation,Average Completed Extended I/O Operation Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.avgsz,disk,"reads, writes",KiB/operation,Average Completed I/O Operation Bandwidth,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk_ext.avgsz,disk,discards,KiB/operation,Average Amount of Discarded Data,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.svctm,disk,svctm,milliseconds/operation,Average Service Time,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.bcache_cache_alloc,disk,"ununsed, dirty, clean, metadata, undefined",percentage,BCache Cache Allocations,stacked,,proc.plugin,/proc/diskstats
+disk.bcache_hit_ratio,disk,"5min, 1hour, 1day, ever",percentage,BCache Cache Hit Ratio,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.bcache_rates,disk,"congested, writeback",KiB/s,BCache Rates,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.bcache_size,disk,dirty,MiB,BCache Cache Sizes,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.bcache_usage,disk,avail,percentage,BCache Cache Usage,area,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.bcache_cache_read_races,disk,"races, errors",operations/s,BCache Cache Read Races,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.bcache,disk,"hits, misses, collisions, readaheads",operations/s,BCache Cache I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
+disk.bcache_bypass,disk,"hits, misses",operations/s,BCache Cache Bypass I/O Operations,line,"device, mount_point, device_type",proc.plugin,/proc/diskstats
md.health,,a dimension per md array,failed disks,Faulty Devices In MD,line,,proc.plugin,/proc/mdstat
md.disks,md array,"inuse, down",disks,Disks Stats,stacked,"device, raid_level",proc.plugin,/proc/mdstat
md.mismatch_cnt,md array,count,unsynchronized blocks,Mismatch Count,line,"device, raid_level",proc.plugin,/proc/mdstat
diff --git a/collectors/proc.plugin/proc_vmstat.c b/collectors/proc.plugin/proc_vmstat.c
index 638d1690c3..ca56e900e2 100644
--- a/collectors/proc.plugin/proc_vmstat.c
+++ b/collectors/proc.plugin/proc_vmstat.c
@@ -10,7 +10,7 @@ int do_proc_vmstat(int update_every, usec_t dt) {
(void)dt;
static procfile *ff = NULL;
- static int do_swapio = -1, do_io = -1, do_pgfaults = -1, do_oom_kill = -1, do_numa = -1;
+ static int do_swapio = -1, do_io = -1, do_pgfaults = -1, do_oom_kill = -1, do_numa = -1, do_thp = -1, do_zswapio = -1, do_balloon = -1, do_ksm = -1;
static int has_numa = -1;
static ARL_BASE *arl_base = NULL;
@@ -31,6 +31,103 @@ int do_proc_vmstat(int update_every, usec_t dt) {
static unsigned long long pswpout = 0ULL;
static unsigned long long oom_kill = 0ULL;
+ // THP page migration
+// static unsigned long long pgmigrate_success = 0ULL;
+// static unsigned long long pgmigrate_fail = 0ULL;
+// static unsigned long long thp_migration_success = 0ULL;
+// static unsigned long long thp_migration_fail = 0ULL;
+// static unsigned long long thp_migration_split = 0ULL;
+
+ // Compaction cost model
+ // https://lore.kernel.org/lkml/20121022080525.GB2198@suse.de/
+// static unsigned long long compact_migrate_scanned = 0ULL;
+// static unsigned long long compact_free_scanned = 0ULL;
+// static unsigned long long compact_isolated = 0ULL;
+
+ // THP defragmentation
+ static unsigned long long compact_stall = 0ULL; // incremented when an application stalls allocating THP
+ static unsigned long long compact_fail = 0ULL; // defragmentation events that failed
+ static unsigned long long compact_success = 0ULL; // defragmentation events that succeeded
+
+ // ?
+// static unsigned long long compact_daemon_wake = 0ULL;
+// static unsigned long long compact_daemon_migrate_scanned = 0ULL;
+// static unsigned long long compact_daemon_free_scanned = 0ULL;
+
+ // ?
+// static unsigned long long htlb_buddy_alloc_success = 0ULL;
+// static unsigned long long htlb_buddy_alloc_fail = 0ULL;
+
+ // ?
+// static unsigned long long cma_alloc_success = 0ULL;
+// static unsigned long long cma_alloc_fail = 0ULL;
+
+ // ?
+// static unsigned long long unevictable_pgs_culled = 0ULL;
+// static unsigned long long unevictable_pgs_scanned = 0ULL;
+// static unsigned long long unevictable_pgs_rescued = 0ULL;
+// static unsigned long long unevictable_pgs_mlocked = 0ULL;
+// static unsigned long long unevictable_pgs_munlocked = 0ULL;
+// static unsigned long long unevictable_pgs_cleared = 0ULL;
+// static unsigned long long unevictable_pgs_stranded = 0ULL;
+
+ // THP handling of page faults
+ static unsigned long long thp_fault_alloc = 0ULL; // is incremented every time a huge page is successfully allocated to handle a page fault. This applies to both the first time a page is faulted and for COW faults.
+ static unsigned long long thp_fault_fallback = 0ULL; // is incremented if a page fault fails to allocate a huge page and instead falls back to using small pages.
+ static unsigned long long thp_fault_fallback_charge = 0ULL; // is incremented if a page fault fails to charge a huge page and instead falls back to using small pages even though the allocation was successful.
+
+ // khugepaged collapsing of small pages into huge pages
+ static unsigned long long thp_collapse_alloc = 0ULL; // is incremented by khugepaged when it has found a range of pages to collapse into one huge page and has successfully allocated a new huge page to store the data.
+ static unsigned long long thp_collapse_alloc_failed = 0ULL; // is incremented if khugepaged found a range of pages that should be collapsed into one huge page but failed the allocation.
+
+ // THP handling of file allocations
+ static unsigned long long thp_file_alloc = 0ULL; // is incremented every time a file huge page is successfully allocated
+ static unsigned long long thp_file_fallback = 0ULL; // is incremented if a file huge page is attempted to be allocated but fails and instead falls back to using small pages
+ static unsigned long long thp_file_fallback_charge = 0ULL; // is incremented if a file huge page cannot be charged and instead falls back to using small pages even though the allocation was successful
+ static unsigned long long thp_file_mapped = 0ULL; // is incremented every time a file huge page is mapped into user address space
+
+ // THP splitting of huge pages into small pages
+ static unsigned long long thp_split_page = 0ULL;
+ static unsigned long long thp_split_page_failed = 0ULL;
+ static unsigned long long thp_deferred_split_page = 0ULL; // is incremented when a huge page is put onto split queue. This happens when a huge page is partially unmapped and splitting it would free up some memory. Pages on split queue are going to be split under memory pressure
+ static unsigned long long thp_split_pmd = 0ULL; // is incremented every time a PMD split into table of PTEs. This can happen, for instance, when application calls mprotect() or munmap() on part of huge page. It doesn’t split huge page, only page table entry
+
+ // ?
+// static unsigned long long thp_scan_exceed_none_pte = 0ULL;
+// static unsigned long long thp_scan_exceed_swap_pte = 0ULL;
+// static unsigned long long thp_scan_exceed_share_pte = 0ULL;
+// static unsigned long long thp_split_pud = 0ULL;
+
+ // THP Zero Huge Page
+ static unsigned long long thp_zero_page_alloc = 0ULL; // is incremented every time a huge zero page used for thp is successfully allocated. Note, it doesn’t count every map of the huge zero page, only its allocation
+ static unsigned long long thp_zero_page_alloc_failed = 0ULL; // is incremented if kernel fails to allocate huge zero page and falls back to using small pages
+
+ // THP Swap Out
+ static unsigned long long thp_swpout = 0ULL; // is incremented every time a huge page is swapout in one piece without splitting
+ static unsigned long long thp_swpout_fallback = 0ULL; // is incremented if a huge page has to be split before swapout. Usually because failed to allocate some continuous swap space for the huge page
+
+ // memory ballooning
+ // Current size of balloon is (balloon_inflate - balloon_deflate) pages
+ static unsigned long long balloon_inflate = 0ULL;
+ static unsigned long long balloon_deflate = 0ULL;
+ static unsigned long long balloon_migrate = 0ULL;
+
+ // ?
+// static unsigned long long swap_ra = 0ULL;
+// static unsigned long long swap_ra_hit = 0ULL;
+
+ static unsigned long long ksm_swpin_copy = 0ULL; // is incremented every time a KSM page is copied when swapping in
+ static unsigned long long cow_ksm = 0ULL; // is incremented every time a KSM page triggers copy on write (COW) when users try to write to a KSM page, we have to make a copy
+
+ // zswap
+ static unsigned long long zswpin = 0ULL;
+ static unsigned long long zswpout = 0ULL;
+
+ // ?
+// static unsigned long long direct_map_level2_splits = 0ULL;
+// static unsigned long long direct_map_level3_splits = 0ULL;
+// static unsigned long long nr_unstable = 0ULL;
+
if(unlikely(!ff)) {
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/vmstat");
@@ -49,7 +146,10 @@ int do_proc_vmstat(int update_every, usec_t dt) {
do_pgfaults = config_get_boolean("plugin:proc:/proc/vmstat", "memory page faults", CONFIG_BOOLEAN_YES);
do_oom_kill = config_get_boolean("plugin:proc:/proc/vmstat", "out of memory kills", CONFIG_BOOLEAN_AUTO);
do_numa = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "system-wide numa metric summary", CONFIG_BOOLEAN_AUTO);
-
+ do_thp = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "transparent huge pages", CONFIG_BOOLEAN_AUTO);
+ do_zswapio = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "zswap i/o", CONFIG_BOOLEAN_AUTO);
+ do_balloon = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "memory ballooning", CONFIG_BOOLEAN_AUTO);
+ do_ksm = config_get_boolean_ondemand("plugin:proc:/proc/vmstat", "kernel same memory", CONFIG_BOOLEAN_AUTO);
arl_base = arl_create("vmstat", NULL, 60);
arl_expect(arl_base, "pgfault", &pgfault);
@@ -94,6 +194,56 @@ int do_proc_vmstat(int update_every, usec_t dt) {
has_numa = 0;
do_numa = CONFIG_BOOLEAN_NO;
}
+
+ if(do_thp == CONFIG_BOOLEAN_YES || do_thp == CONFIG_BOOLEAN_AUTO) {
+// arl_expect(arl_base, "pgmigrate_success", &pgmigrate_success);
+// arl_expect(arl_base, "pgmigrate_fail", &pgmigrate_fail);
+// arl_expect(arl_base, "thp_migration_success", &thp_migration_success);
+// arl_expect(arl_base, "thp_migration_fail", &thp_migration_fail);
+// arl_expect(arl_base, "thp_migration_split", &thp_migration_split);
+// arl_expect(arl_base, "compact_migrate_scanned", &compact_migrate_scanned);
+// arl_expect(arl_base, "compact_free_scanned", &compact_free_scanned);
+// arl_expect(arl_base, "compact_isolated", &compact_isolated);
+ arl_expect(arl_base, "compact_stall", &compact_stall);
+ arl_expect(arl_base, "compact_fail", &compact_fail);
+ arl_expect(arl_base, "compact_success", &compact_success);
+// arl_expect(arl_base, "compact_daemon_wake", &compact_daemon_wake);
+// arl_expect(arl_base, "compact_daemon_migrate_scanned", &compact_daemon_migrate_scanned);
+// arl_expect(arl_base, "compact_daemon_free_scanned", &compact_daemon_free_scanned);
+ arl_expect(arl_base, "thp_fault_alloc", &thp_fault_alloc);
+ arl_expect(arl_base, "thp_fault_fallback", &thp_fault_fallback);
+ arl_expect(arl_base, "thp_fault_fallback_charge", &thp_fault_fallback_charge);
+ arl_expect(arl_base, "thp_collapse_alloc", &thp_collapse_alloc);
+ arl_expect(arl_base, "thp_collapse_alloc_failed", &thp_collapse_alloc_failed);
+ arl_expect(arl_base, "thp_file_alloc", &thp_file_alloc);
+ arl_expect(arl_base, "thp_file_fallback", &thp_file_fallback);
+ arl_expect(arl_base, "thp_file_fallback_charge", &thp_file_fallback_charge);
+ arl_expect(arl_base, "thp_file_mapped", &thp_file_mapped);
+ arl_expect(arl_base, "thp_split_page", &thp_split_page);
+ arl_expect(arl_base, "thp_split_page_failed", &thp_split_page_failed);
+ arl_expect(arl_base, "thp_deferred_split_page", &thp_deferred_split_page);
+ arl_expect(arl_base, "thp_split_pmd", &thp_split_pmd);
+ arl_expect(arl_base, "thp_zero_page_alloc", &thp_zero_page_alloc);
+ arl_expect(arl_base, "thp_zero_page_alloc_failed", &thp_zero_page_alloc_failed);
+ arl_expect(arl_base, "thp_swpout", &thp_swpout);
+ arl_expect(arl_base, "thp_swpout_fallback", &thp_swpout_fallback);
+ }
+
+ if(do_balloon == CONFIG_BOOLEAN_YES || do_balloon == CONFIG_BOOLEAN_AUTO) {
+ arl_expect(arl_base, "balloon_inflate", &balloon_inflate);
+ arl_expect(arl_base, "balloon_deflate", &balloon_deflate);
+ arl_expect(arl_base, "balloon_migrate", &balloon_migrate);
+ }
+
+ if(do_ksm == CONFIG_BOOLEAN_YES || do_ksm == CONFIG_BOOLEAN_AUTO) {
+ arl_expect(arl_base, "ksm_swpin_copy", &ksm_swpin_copy);
+ arl_expect(arl_base, "cow_ksm", &cow_ksm);
+ }
+
+ if(do_zswapio == CONFIG_BOOLEAN_YES || do_zswapio == CONFIG_BOOLEAN_AUTO) {
+ arl_expect(arl_base, "zswpin", &zswpin);
+ arl_expect(arl_base, "zswpout", &zswpout);
+ }
}
arl_begin(arl_base);
@@ -306,6 +456,355 @@ int do_proc_vmstat(int update_every, usec_t dt) {
rrdset_done(st_numa);
}
+ // --------------------------------------------------------------------
+
+ if(do_balloon == CONFIG_BOOLEAN_YES || (do_balloon == CONFIG_BOOLEAN_AUTO && (balloon_inflate || balloon_deflate ||
+ balloon_migrate || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
+ do_balloon = CONFIG_BOOLEAN_YES;
+
+ static RRDSET *st_balloon = NULL;
+ static RRDDIM *rd_inflate = NULL, *rd_deflate = NULL, *rd_migrate = NULL;
+
+ if(unlikely(!st_balloon)) {
+ st_balloon = rrdset_create_localhost(
+ "mem"
+ , "balloon"
+ , NULL
+ , "balloon"
+ , NULL
+ , "Memory Ballooning Operations"
+ , "KiB/s"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_VMSTAT_NAME
+ , NETDATA_CHART_PRIO_MEM_BALLOON
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rd_inflate = rrddim_add(st_balloon, "inflate", NULL, sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_INCREMENTAL);
+ rd_deflate = rrddim_add(st_balloon, "deflate", NULL, -sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_INCREMENTAL);
+ rd_migrate = rrddim_add(st_balloon, "migrate", NULL, sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st_balloon, rd_inflate, balloon_inflate);
+ rrddim_set_by_pointer(st_balloon, rd_deflate, balloon_deflate);
+ rrddim_set_by_pointer(st_balloon, rd_migrate, balloon_migrate);
+
+ rrdset_done(st_balloon);
+ }
+
+ // --------------------------------------------------------------------
+
+ if(do_zswapio == CONFIG_BOOLEAN_YES || (do_zswapio == CONFIG_BOOLEAN_AUTO &&
+ (zswpin || zswpout ||
+ netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
+ do_zswapio = CONFIG_BOOLEAN_YES;
+
+ static RRDSET *st_zswapio = NULL;
+ static RRDDIM *rd_in = NULL, *rd_out = NULL;
+
+ if(unlikely(!st_zswapio)) {
+ st_zswapio = rrdset_create_localhost(
+ "system"
+ , "zswapio"
+ , NULL
+ , "zswap"
+ , NULL
+ , "ZSwap I/O"
+ , "KiB/s"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_VMSTAT_NAME
+ , NETDATA_CHART_PRIO_SYSTEM_ZSWAPIO
+ , update_every
+ , RRDSET_TYPE_AREA
+ );
+
+ rd_in = rrddim_add(st_zswapio, "in", NULL, sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_INCREMENTAL);
+ rd_out = rrddim_add(st_zswapio, "out", NULL, -sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st_zswapio, rd_in, zswpin);
+ rrddim_set_by_pointer(st_zswapio, rd_out, zswpout);
+ rrdset_done(st_zswapio);
+ }
+
+ // --------------------------------------------------------------------
+
+ if(do_ksm == CONFIG_BOOLEAN_YES || (do_ksm == CONFIG_BOOLEAN_AUTO &&
+ (cow_ksm || ksm_swpin_copy ||
+ netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
+ do_ksm = CONFIG_BOOLEAN_YES;
+
+ static RRDSET *st_ksm_cow = NULL;
+ static RRDDIM *rd_swapin = NULL, *rd_write = NULL;
+
+ if(unlikely(!st_ksm_cow)) {
+ st_ksm_cow = rrdset_create_localhost(
+ "mem"
+ , "ksm_cow"
+ , NULL
+ , "ksm"
+ , NULL
+ , "KSM Copy On Write Operations"
+ , "KiB/s"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_VMSTAT_NAME
+ , NETDATA_CHART_PRIO_MEM_KSM_COW
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rd_swapin = rrddim_add(st_ksm_cow, "swapin", NULL, sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_INCREMENTAL);
+ rd_write = rrddim_add(st_ksm_cow, "write", NULL, sysconf(_SC_PAGESIZE), 1024, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st_ksm_cow, rd_swapin, ksm_swpin_copy);
+ rrddim_set_by_pointer(st_ksm_cow, rd_write, cow_ksm);
+
+ rrdset_done(st_ksm_cow);
+ }
+
+ // --------------------------------------------------------------------
+
+ if(do_thp == CONFIG_BOOLEAN_YES || do_thp == CONFIG_BOOLEAN_AUTO) {
+
+ if(do_thp == CONFIG_BOOLEAN_YES || (do_thp == CONFIG_BOOLEAN_AUTO &&
+ (netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES || thp_fault_alloc || thp_fault_fallback || thp_fault_fallback_charge))) {
+
+ static RRDSET *st_thp_fault = NULL;
+ static RRDDIM *rd_alloc = NULL, *rd_fallback = NULL, *rd_fallback_charge = NULL;
+
+ if(unlikely(!st_thp_fault)) {
+ st_thp_fault = rrdset_create_localhost(
+ "mem"
+ , "thp_faults"
+ , NULL
+ , "hugepages"
+ , NULL
+ , "Transparent Huge Page Fault Allocations"
+ , "events/s"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_VMSTAT_NAME
+ , NETDATA_CHART_PRIO_MEM_HUGEPAGES_FAULTS
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rd_alloc = rrddim_add(st_thp_fault, "alloc", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_fallback = rrddim_add(st_thp_fault, "fallback", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_fallback_charge = rrddim_add(st_thp_fault, "fallback_charge", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st_thp_fault, rd_alloc, thp_fault_alloc);
+ rrddim_set_by_pointer(st_thp_fault, rd_fallback, thp_fault_fallback);
+ rrddim_set_by_pointer(st_thp_fault, rd_fallback_charge, thp_fault_fallback_charge);
+
+ rrdset_done(st_thp_fault);
+ }
+
+ if(do_thp == CONFIG_BOOLEAN_YES || (do_thp == CONFIG_BOOLEAN_AUTO &&
+ (netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES || thp_fault_alloc || thp_fault_fallback || thp_fault_fallback_charge || thp_file_mapped))) {
+
+ static RRDSET *st_thp_file = NULL;
+ static RRDDIM *rd_alloc = NULL, *rd_fallback = NULL, *rd_fallback_charge = NULL, *rd_mapped = NULL;
+
+ if(unlikely(!st_thp_file)) {
+ st_thp_file = rrdset_create_localhost(
+ "mem"
+ , "thp_file"
+ , NULL
+ , "hugepages"
+ , NULL
+ , "Transparent Huge Page File Allocations"
+ , "events/s"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_VMSTAT_NAME
+ , NETDATA_CHART_PRIO_MEM_HUGEPAGES_FILE
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rd_alloc = rrddim_add(st_thp_file, "alloc", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_fallback = rrddim_add(st_thp_file, "fallback", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_mapped = rrddim_add(st_thp_file, "mapped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_fallback_charge = rrddim_add(st_thp_file, "fallback_charge", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st_thp_file, rd_alloc, thp_file_alloc);
+ rrddim_set_by_pointer(st_thp_file, rd_fallback, thp_file_fallback);
+ rrddim_set_by_pointer(st_thp_file, rd_mapped, thp_file_fallback_charge);
+ rrddim_set_by_pointer(st_thp_file, rd_fallback_charge, thp_file_fallback_charge);
+
+ rrdset_done(st_thp_file);
+ }
+
+ if(do_thp == CONFIG_BOOLEAN_YES || (do_thp == CONFIG_BOOLEAN_AUTO &&
+ (netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES || thp_zero_page_alloc || thp_zero_page_alloc_failed))) {
+
+ static RRDSET *st_thp_zero = NULL;
+ static RRDDIM *rd_alloc = NULL, *rd_failed = NULL;
+
+ if(unlikely(!st_thp_zero)) {
+ st_thp_zero = rrdset_create_localhost(
+ "mem"
+ , "thp_zero"
+ , NULL
+ , "hugepages"
+ , NULL
+ , "Transparent Huge Zero Page Allocations"
+ , "events/s"
+ , PLUGIN_PROC_NAME
+ , PLUGIN_PROC_MODULE_VMSTAT_NAME
+ , NETDATA_CHART_PRIO_MEM_HUGEPAGES_ZERO
+ , update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rd_alloc = rrddim_add(st_thp_zero, "alloc", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_failed = rrddim_add(st_thp_zero, "failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st_thp_zero, rd_alloc, thp_zero_page_alloc);
+ rrddim_set_by_pointer(st_thp_zero, rd_failed, thp_zero_page_alloc_failed);
+
+ rrdset_done(st_thp_zero);
+ }
+
+ if(do_thp == CONFIG_BOOLEAN_YES || (do_thp == CONFIG_BOOLEAN_AUTO &&
+ (netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES || thp_collapse_alloc || thp_collapse_alloc_failed))) {
+