summaryrefslogtreecommitdiffstats
path: root/collectors/proc.plugin
diff options
context:
space:
mode:
authorCosta Tsaousis <costa@netdata.cloud>2023-07-26 01:06:57 +0300
committerGitHub <noreply@github.com>2023-07-26 01:06:57 +0300
commit065091c3f5d908c5e06ed5ac817999f7dc965b90 (patch)
tree70e29c0d74eaf33f69e9e393732d3d051fd3da5d /collectors/proc.plugin
parentaccc426c8ab08a75c9e6470a4a89482fb4c4eb17 (diff)
proc integrations (#15494)
Co-authored-by: ilyam8 <ilya@netdata.cloud>
Diffstat (limited to 'collectors/proc.plugin')
-rw-r--r--collectors/proc.plugin/multi_metadata.yaml412
-rw-r--r--collectors/proc.plugin/proc_meminfo.c520
-rw-r--r--collectors/proc.plugin/proc_pressure.c137
-rw-r--r--collectors/proc.plugin/proc_pressure.h3
-rw-r--r--collectors/proc.plugin/proc_stat.c2
-rw-r--r--collectors/proc.plugin/proc_vmstat.c12
6 files changed, 894 insertions, 192 deletions
diff --git a/collectors/proc.plugin/multi_metadata.yaml b/collectors/proc.plugin/multi_metadata.yaml
index 04e66ddfe6..bdbce89f98 100644
--- a/collectors/proc.plugin/multi_metadata.yaml
+++ b/collectors/proc.plugin/multi_metadata.yaml
@@ -20,27 +20,50 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration provides a collection of statistics about the system such as CPU utilization, process counts and more."
+ metrics_description: |
+ CPU utilization, states and frequencies and key Linux system performance metrics.
+
+ The `/proc/stat` file provides various types of system statistics:
+
+ - The overall system CPU usage statistics
+ - Per CPU core statistics
+ - The total context switching of the system
+ - The total number of processes running
+ - The total CPU interrupts
+ - The total CPU softirqs
+
+ The collector also reads:
+
+ - `/proc/schedstat` for statistics about the process scheduler in the Linux kernel.
+ - `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems.
+ - `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system.
+ - `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core.
+ - `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states.
+ - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system.
+ - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started.
method_description: ""
supported_platforms:
- include: []
+ include: [ "linux" ]
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
auto_detection:
- description: ""
+ description: |
+ The collector auto-detects all metrics. No configuration is needed.
limits:
description: ""
performance_impact:
- description: ""
+ description: |
+ The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available.
setup:
prerequisites:
list: []
configuration:
file:
- name: ""
+ section_name: "plugin:proc:/proc/stat"
+ name: "netdata.conf"
description: ""
options:
description: ""
@@ -187,12 +210,43 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: ""
+ metrics_description: |
+ Entropy, a measure of the randomness or unpredictability of data.
+
+ In the context of cryptography, entropy is used to generate random numbers or keys that are essential for
+ secure communication and encryption. Without a good source of entropy, cryptographic protocols can become
+ vulnerable to attacks that exploit the predictability of the generated keys.
+
+ In most operating systems, entropy is generated by collecting random events from various sources, such as
+ hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool
+ of entropy, which is then used to generate random numbers when needed.
+
+ The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs
+ to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device,
+ which blocks until enough entropy is available to generate the requested numbers. This ensures that the
+ generated numbers are truly random and not predictable.
+
+ However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing
+ programs that rely on random numbers to slow down or even freeze. This is especially problematic for
+ cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH.
+
+ To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality
+ entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or
+ radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than
+ software-based sources.
+
+ One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used
+ for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates
+ high-quality entropy, which can be used to seed the pool of entropy in the operating system.
+
+ Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by
+ exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions
+ can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions.
method_description: ""
supported_platforms:
- include: []
+ include: [ "linux" ]
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -264,12 +318,23 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration provides the amount of time the system has been up (running)."
+ metrics_description: |
+ The amount of time the system has been up (running).
+
+ Uptime is a critical aspect of overall system performance:
+
+ - **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes.
+ - **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends.
+ - **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems.
+ - **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause.
+ - **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others.
+ - **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention.
+ - **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability.
method_description: ""
supported_platforms:
- include: []
+ include: [ "linux" ]
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -340,12 +405,33 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration provides information about process, memory, swap space utilization and many more."
+ metrics_description: |
+ Linux Virtual memory subsystem.
+
+ Information about memory management, indicating how effectively the kernel allocates and frees
+ memory resources in response to system demands.
+
+ Monitors page faults, which occur when a process requests a portion of its memory that isn't
+ immediately available. Monitoring these events can help diagnose inefficiencies in memory management and
+ provide insights into application behavior.
+
+ Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to
+ swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap,
+ a compressed cache for swap pages, and provides insights into its usage and performance implications.
+
+ In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance
+ memory resources between host and guest systems.
+
+ For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which
+ can impact the performance based on the memory access times.
+
+ The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out
+ of memory resources.
method_description: ""
supported_platforms:
- include: []
+ include: [ "linux" ]
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -535,7 +621,37 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "Monitor Interrupts metrics for efficient processor interrupt handling."
+ metrics_description: |
+ Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt.
+ The numbers reported are the counts of the interrupts that have occurred of each type.
+
+ An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs
+ immediate attention. The processor then interrupts its current activities and executes the interrupt handler
+ to deal with the event. This is part of the way a computer multitasks and handles concurrent processing.
+
+ The types of interrupts include:
+
+ - **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when
+ you type something on the keyboard, an interrupt is triggered so the processor can handle the new input.
+
+ - **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily
+ used to switch the CPU among different tasks.
+
+ - **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources.
+
+ - **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc.
+
+ Monitoring `/proc/interrupts` can be used for:
+
+ - **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not
+ configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system
+ performance degradation.
+
+ - **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem.
+
+ - **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you
+ understand what your system is doing. It can provide insights into the system's interaction with hardware,
+ drivers, and other parts of the kernel.
method_description: ""
supported_platforms:
include: []
@@ -621,12 +737,35 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration provides the system load average for the last 1, 5, and 15 minutes."
+ metrics_description: |
+ The `/proc/loadavg` file provides information about the system load average.
+
+ The load average is a measure of the amount of computational work that a system performs. It is a
+ representation of the average system load over a period of time.
+
+ This file contains three numbers representing the system load averages for the last 1, 5, and 15 minutes,
+ respectively. It also includes the currently running processes and the total number of processes.
+
+ Monitoring the load average can be used for:
+
+ - **System performance**: If the load average is too high, it may indicate that your system is overloaded.
+ On a system with a single CPU, if the load average is 1, it means the single CPU is fully utilized. If the
+ load averages are consistently higher than the number of CPUs/cores, it may indicate that your system is
+ overloaded and tasks are waiting for CPU time.
+
+ - **Troubleshooting**: If the load average is unexpectedly high, it can be a sign of a problem. This could be
+ due to a runaway process, a software bug, or a hardware issue.
+
+ - **Capacity planning**: By monitoring the load average over time, you can understand the trends in your
+ system's workload. This can help with capacity planning and scaling decisions.
+
+ Remember that load average not only considers CPU usage, but also includes processes waiting for disk I/O.
+ Therefore, high load averages could be due to I/O contention as well as CPU contention.
method_description: ""
supported_platforms:
include: []
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -726,12 +865,35 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration measures system pressure, which can indicate resource shortages in CPU, memory, or I/O."
+ metrics_description: |
+ Introduced in Linux kernel 4.20, `/proc/pressure` provides information about system pressure stall information
+ (PSI). PSI is a feature that allows the system to track the amount of time the system is stalled due to
+ resource contention, such as CPU, memory, or I/O.
+
+ The collectors monitored 3 separate files for CPU, memory, and I/O:
+
+ - **cpu**: Tracks the amount of time tasks are stalled due to CPU contention.
+ - **memory**: Tracks the amount of time tasks are stalled due to memory contention.
+ - **io**: Tracks the amount of time tasks are stalled due to I/O contention.
+ - **irq**: Tracks the amount of time tasks are stalled due to IRQ contention.
+
+ Each of them provides metrics for stall time over the last 10 seconds, 1 minute, 5 minutes, and 15 minutes.
+
+ Monitoring the /proc/pressure files can provide important insights into system performance and capacity planning:
+
+ - **Identifying resource contention**: If these metrics are consistently high, it indicates that tasks are
+ frequently being stalled due to lack of resources, which can significantly degrade system performance.
+
+ - **Troubleshooting performance issues**: If a system is experiencing performance issues, these metrics can
+ help identify whether resource contention is the cause.
+
+ - **Capacity planning**: By monitoring these metrics over time, you can understand trends in resource
+ utilization and make informed decisions about when to add more resources to your system.
method_description: ""
supported_platforms:
include: []
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -878,7 +1040,29 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "Monitor SoftIRQs metrics for efficient software interrupt operations."
+ metrics_description: |
+ In the Linux kernel, handling of hardware interrupts is split into two halves: the top half and the bottom half.
+ The top half is the routine that responds immediately to an interrupt, while the bottom half is deferred to be processed later.
+
+ Softirqs are a mechanism in the Linux kernel used to handle the bottom halves of interrupts, which can be
+ deferred and processed later in a context where it's safe to enable interrupts.
+
+ The actual work of handling the interrupt is offloaded to a softirq and executed later when the system
+ decides it's a good time to process them. This helps to keep the system responsive by not blocking the top
+ half for too long, which could lead to missed interrupts.
+
+ Monitoring `/proc/softirqs` is useful for:
+
+ - **Performance tuning**: A high rate of softirqs could indicate a performance issue. For instance, a high
+ rate of network softirqs (`NET_RX` and `NET_TX`) could indicate a network performance issue.
+
+ - **Troubleshooting**: If a system is behaving unexpectedly, checking the softirqs could provide clues about
+ what is going on. For example, a sudden increase in block device softirqs (BLOCK) might indicate a problem
+ with a disk.
+
+ - **Understanding system behavior**: Knowing what types of softirqs are happening can help you understand what
+ your system is doing, particularly in terms of how it's interacting with hardware and how it's handling
+ interrupts.
method_description: ""
supported_platforms:
include: []
@@ -963,7 +1147,29 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration provides statistics on Softnet, such as processed events, dropped events and more."
+ metrics_description: |
+ `/proc/net/softnet_stat` provides statistics that relate to the handling of network packets by softirq.
+
+ It provides information about:
+
+ - Total number of processed packets (`processed`).
+ - Times ksoftirq ran out of quota (`dropped`).
+ - Times net_rx_action was rescheduled.
+ - Number of times processed all lists before quota.
+ - Number of times did not process all lists due to quota.
+ - Number of times net_rx_action was rescheduled for GRO (Generic Receive Offload) cells.
+ - Number of times GRO cells were processed.
+
+ Monitoring the /proc/net/softnet_stat file can be useful for:
+
+ - **Network performance monitoring**: By tracking the total number of processed packets and how many packets
+ were dropped, you can gain insights into your system's network performance.
+
+ - **Troubleshooting**: If you're experiencing network-related issues, this collector can provide valuable clues.
+ For instance, a high number of dropped packets may indicate a network problem.
+
+ - **Capacity planning**: If your system is consistently processing near its maximum capacity of network
+ packets, it might be time to consider upgrading your network infrastructure.
method_description: ""
supported_platforms:
include: []
@@ -1069,12 +1275,28 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "Monitor memory usage metrics for efficient system memory management."
+ metrics_description: |
+ `/proc/meminfo` provides detailed information about the system's current memory usage. It includes information
+ about different types of memory, RAM, Swap, ZSwap, HugePages, Transparent HugePages (THP), Kernel memory,
+ SLAB memory, memory mappings, and more.
+
+ Monitoring /proc/meminfo can be useful for:
+
+ - **Performance Tuning**: Understanding your system's memory usage can help you make decisions about system
+ tuning and optimization. For example, if your system is frequently low on free memory, it might benefit
+ from more RAM.
+
+ - **Troubleshooting**: If your system is experiencing problems, `/proc/meminfo` can provide clues about
+ whether memory usage is a factor. For example, if your system is slow and cached swap is high, it could
+ mean that your system is swapping out a lot of memory to disk, which can degrade performance.
+
+ - **Capacity Planning**: By monitoring memory usage over time, you can understand trends and make informed
+ decisions about future capacity needs.
method_description: ""
supported_platforms:
include: []
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -1152,13 +1374,26 @@ modules:
chart_type: area
dimensions:
- name: avail
- - name: system.swap
+ - name: mem.swap
description: System Swap
unit: "MiB"
chart_type: stacked
dimensions:
- name: free
- name: used
+ - name: mem.swap_cached
+ description: Swap Memory Cached in RAM
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: cached
+ - name: mem.zswap
+ description: Zswap Usage
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: in-ram
+ - name: on-disk
- name: mem.hwcorrupt
description: Corrupted Memory detected by ECC
unit: "MiB"
@@ -1198,7 +1433,7 @@ modules:
dimensions:
- name: reclaimable
- name: unreclaimable
- - name: mem.hugepage
+ - name: mem.hugepages
description: Dedicated HugePages Memory
unit: "MiB"
chart_type: stacked
@@ -1207,13 +1442,59 @@ modules:
- name: used
- name: surplus
- name: reserved
- - name: mem.transparent_hugepages
+ - name: mem.thp
description: Transparent HugePages Memory
unit: "MiB"
chart_type: stacked
dimensions:
- name: anonymous
- name: shmem
+ - name: mem.thp_details
+ description: Details of Transparent HugePages Usage
+ unit: "MiB"
+ chart_type: line
+ dimensions:
+ - name: ShmemPmdMapped
+ - name: FileHugePages
+ - name: FilePmdMapped
+ - name: mem.reclaiming
+ description: Memory Reclaiming
+ unit: "MiB"
+ chart_type: line
+ dimensions:
+ - name: Active
+ - name: Inactive
+ - name: Active(anon)
+ - name: Inactive(anon)
+ - name: Active(file)
+ - name: Inactive(file)
+ - name: Unevictable
+ - name: Mlocked
+ - name: mem.high_low
+ description: High and Low Used and Free Memory Areas
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: high_used
+ - name: low_used
+ - name: high_free
+ - name: low_free
+ - name: mem.cma
+ description: Contiguous Memory Allocator (CMA) Memory
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: used
+ - name: free
+ - name: mem.directmaps
+ description: Direct Memory Mappings
+ unit: "MiB"
+ chart_type: stacked
+ dimensions:
+ - name: 4k
+ - name: 2m
+ - name: 4m
+ - name: 1g
- meta:
plugin_name: proc.plugin
module_name: /proc/pagetypeinfo
@@ -1238,7 +1519,7 @@ modules:
supported_platforms:
include: []
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -1326,7 +1607,20 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration monitors system memory errors detected and corrected by ECC RAM."
+ metrics_description: |
+ The Error Detection and Correction (EDAC) subsystem is detecting and reporting errors in the system's memory,
+ primarily ECC (Error-Correcting Code) memory errors.
+
+ The collector provides data for:
+
+ - Per memory controller (MC): correctable and uncorrectable errors. These can be of 2 kinds:
+ - errors related to a DIMM
+ - errors that cannot be associated with a DIMM
+
+ - Per memory DIMM: correctable and uncorrectable errors. There are 2 kinds:
+ - memory controllers that can identify the physical DIMMS and report errors directly for them,
+ - memory controllers that report errors for memory address ranges that can be linked to dimms.
+ In this case the DIMMS reported may be more than the physical DIMMS installed.
method_description: ""
supported_platforms:
include: []
@@ -1423,7 +1717,7 @@ modules:
- name: dimm_location
description: Location of the memory module.
- name: dimm_mem_type
- description: Type of the memory module. Usually either buffered or unbuffered memory.
+ description: Type of the memory module.
- name: size
description: The amount of memory in megabytes that this memory module manages.
metrics:
@@ -1453,7 +1747,19 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "Monitor NUMA metrics for efficient non-uniform memory access operations."
+ metrics_description: |
+ Information about NUMA (Non-Uniform Memory Access) nodes on the system.
+
+ NUMA is a method of configuring a cluster of microprocessor in a multiprocessing system so that they can
+ share memory locally, improving performance and the ability of the system to be expanded. NUMA is used in a
+ symmetric multiprocessing (SMP) system.
+
+ In a NUMA system, processors, memory, and I/O devices are grouped together into cells, also known as nodes.
+ Each node has its own memory and set of I/O devices, and one or more processors. While a processor can access
+ memory in any of the nodes, it does so faster when accessing memory within its own node.
+
+ The collector provides statistics on memory allocations for processes running on the NUMA nodes, revealing the
+ efficiency of memory allocations in multi-node systems.
method_description: ""
supported_platforms:
include: []
@@ -1530,15 +1836,23 @@ modules:
description: ""
keywords:
- ksm
+ - samepage
+ - merging
most_popular: false
overview:
data_collection:
- metrics_description: "Examine KSM metrics for insights into memory deduplication operations."
+ metrics_description: |
+ Kernel Samepage Merging (KSM) is a memory-saving feature in Linux that enables the kernel to examine the
+ memory of different processes and identify identical pages. It then merges these identical pages into a
+ single page that the processes share. This is particularly useful for virtualization, where multiple virtual
+ machines might be running the same operating system or applications and have many identical pages.
+
+ The collector provides information about the operation and effectiveness of KSM on your system.
method_description: ""
supported_platforms:
include: []
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -1622,7 +1936,11 @@ modules:
most_popular: false
overview:
data_collection:
- metrics_description: "This integration monitors ZRAM usage, compression ratios, and more."
+ metrics_description: |
+ zRAM, or compressed RAM, is a block device that uses a portion of your system's RAM as a block device.
+ The data written to this block device is compressed and stored in memory.
+
+ The collectors provides information about the operation and the effectiveness of zRAM on your system.
method_description: ""
supported_platforms:
include: []
@@ -1715,15 +2033,30 @@ modules:
keywords:
- ipc
- semaphores
+ - shared memory
most_popular: false
overview:
data_collection:
- metrics_description: "Monitor Inter Process Communication performance for optimal process interaction."
+ metrics_description: |
+ IPC stands for Inter-Process Communication. It is a mechanism which allows processes to communicate with each
+ other and synchronize their actions.
+
+ This collector exposes information about:
+
+ - Message Queues: This allows messages to be exchanged between processes. It's a more flexible method that
+ allows messages to be placed onto a queue and read at a later time.
+
+ - Shared Memory: This method allows for the fastest form of IPC because processes can exchange data by
+ reading/writing into shared memory segments.
+
+ - Semaphores: They are used to synchronize the operations performed by independent processes. So, if multiple
+ processes are trying to access a single shared resource, semaphores can ensure that only one process
+ accesses the resource at a given time.
method_description: ""
supported_platforms:
include: []
exclude: []
- multi_instance: true
+ multi_instance: false
additional_permissions:
description: ""
default_behavior:
@@ -1828,13 +2161,18 @@ modules:
description: ""
keywords:
- disk
+ - disks
- io
- bcache
- block devices
most_popular: false
overview:
data_collection:
- metrics_description: "This integration provides statistics about disk and Bcache I/O."
+ metrics_description: |
+ Detailed statistics for each of your system's disk devices and partitions.
+ The data is reported by the kernel and can be used to monitor disk activity on a Linux system.
+
+ Get valuable insight into how your disks are performing and where potential bottlenecks might be.
method_description: ""
supported_platforms:
include: []
diff --git a/collectors/proc.plugin/proc_meminfo.c b/collectors/proc.plugin/proc_meminfo.c
index 6988c70e01..cd1ba872c7 100644
--- a/collectors/proc.plugin/proc_meminfo.c
+++ b/collectors/proc.plugin/proc_meminfo.c
@@ -9,58 +9,92 @@ int do_proc_meminfo(int update_every, usec_t dt) {
(void)dt;
static procfile *ff = NULL;
- static int do_ram = -1, do_swap = -1, do_hwcorrupt = -1, do_committed = -1, do_writeback = -1, do_kernel = -1, do_slab = -1, do_hugepages = -1, do_transparent_hugepages = -1;
- static int do_percpu = 0;
+ static int do_ram = -1
+ , do_swap = -1
+ , do_hwcorrupt = -1
+ , do_committed = -1
+ , do_writeback = -1
+ , do_kernel = -1
+ , do_slab = -1
+ , do_hugepages = -1
+ , do_transparent_hugepages = -1
+ , do_reclaiming = -1
+ , do_high_low = -1
+ , do_cma = -1
+ , do_directmap = -1;
static ARL_BASE *arl_base = NULL;
- static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL;
+ static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL, *arl_hugepages_total = NULL,
+ *arl_zswapped = NULL, *arl_high_low = NULL, *arl_cma_total = NULL,
+ *arl_directmap4k = NULL, *arl_directmap2m = NULL, *arl_directmap4m = NULL, *arl_directmap1g = NULL;
static unsigned long long
- MemTotal = 0,
- MemFree = 0,
- MemAvailable = 0,
- Buffers = 0,
- Cached = 0,
- //SwapCached = 0,
- //Active = 0,
- //Inactive = 0,
- //ActiveAnon = 0,
- //InactiveAnon = 0,
- //ActiveFile = 0,
- //InactiveFile = 0,
- //Unevictable = 0,
- //Mlocked = 0,
- SwapTotal = 0,
- SwapFree = 0,
- Dirty = 0,
- Writeback = 0,
- //AnonPages = 0,
- //Mapped = 0,
- Shmem = 0,
- Slab = 0,
- SReclaimable = 0,
- SUnreclaim = 0,
- KernelStack = 0,
- PageTables = 0,
- NFS_Unstable = 0,
- Bounce = 0,
- WritebackTmp = 0,
- //CommitLimit = 0,
- Committed_AS = 0,
- //VmallocTotal = 0,
- VmallocUsed = 0,
- //VmallocChunk = 0,
- Percpu = 0,
- AnonHugePages = 0,
- ShmemHugePages = 0,
- HugePages_Total = 0,
- HugePages_Free = 0,
- HugePages_Rsvd = 0,
- HugePages_Surp = 0,
- Hugepagesize = 0,
- //DirectMap4k = 0,
- //DirectMap2M = 0,
- HardwareCorrupted = 0;
+ MemTotal = 0
+ , MemFree = 0
+ , MemAvailable = 0
+ , Buffers = 0
+ , Cached = 0
+ , SwapCached = 0
+ , Active = 0
+ , Inactive = 0
+ , ActiveAnon = 0
+ , InactiveAnon = 0
+ , ActiveFile = 0
+ , InactiveFile = 0
+ , Unevictable = 0
+ , Mlocked = 0
+ , HighTotal = 0
+ , HighFree = 0
+ , LowTotal = 0
+ , LowFree = 0
+ , MmapCopy = 0
+ , SwapTotal = 0
+ , SwapFree = 0
+ , Zswap = 0
+ , Zswapped = 0
+ , Dirty = 0
+ , Writeback = 0
+ , AnonPages = 0
+ , Mapped = 0
+ , Shmem = 0
+ , KReclaimable = 0
+ , Slab = 0
+ , SReclaimable = 0
+ , SUnreclaim = 0
+ , KernelStack = 0
+ , ShadowCallStack = 0
+ , PageTables = 0
+ , SecPageTables = 0
+ , NFS_Unstable = 0
+ , Bounce = 0
+ , WritebackTmp = 0
+ , CommitLimit = 0
+ , Committed_AS = 0
+ , VmallocTotal = 0
+ , VmallocUsed = 0
+ , VmallocChunk = 0
+ , Percpu = 0
+ //, EarlyMemtestBad = 0
+ , HardwareCorrupted = 0
+ , AnonHugePages = 0
+ , ShmemHugePages = 0
+ , ShmemPmdMapped = 0
+ , FileHugePages = 0
+ , FilePmdMapped = 0
+ , CmaTotal = 0