summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClement Tsang <34804052+ClementTsang@users.noreply.github.com>2023-05-21 00:53:27 -0400
committerGitHub <noreply@github.com>2023-05-21 00:53:27 -0400
commitcaa1d1b88b7f1d3046d600b925fa357c20327be4 (patch)
treee42e24788a1ea3326a37502c3945257621d5055d
parent8a49c492673fa0c7f01e4a3c8cc04cd4c4725635 (diff)
refactor: remove procfs (#1163)
* refactor: remove procfs, use personal impls/rustix directly * buffer sharing * inline * some cleanup
-rw-r--r--.gitignore2
-rw-r--r--Cargo.lock53
-rw-r--r--Cargo.toml2
-rw-r--r--src/app/data_harvester/processes/linux.rs99
-rw-r--r--src/app/data_harvester/processes/linux/process.rs291
-rw-r--r--src/utils/error.rs27
6 files changed, 357 insertions, 117 deletions
diff --git a/.gitignore b/.gitignore
index b386e452..3e337170 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,4 +38,4 @@ site/
# dhat heap profiling
dhat-heap.json
-dhat/ \ No newline at end of file
+dhat/
diff --git a/Cargo.lock b/Cargo.lock
index 509f8fa0..151c8bcc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -165,9 +165,9 @@ dependencies = [
"nvml-wrapper",
"once_cell",
"predicates",
- "procfs",
"ratatui",
"regex",
+ "rustix",
"serde",
"serde_json",
"starship-battery",
@@ -575,12 +575,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286"
[[package]]
-name = "hex"
-version = "0.4.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
-
-[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -631,7 +625,7 @@ checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f"
dependencies = [
"hermit-abi 0.3.1",
"io-lifetimes",
- "rustix 0.37.19",
+ "rustix",
"windows-sys 0.48.0",
]
@@ -660,12 +654,6 @@ dependencies = [
]
[[package]]
-name = "lazy_static"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
-
-[[package]]
name = "lazycell"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -689,12 +677,6 @@ dependencies = [
[[package]]
name = "linux-raw-sys"
-version = "0.1.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4"
-
-[[package]]
-name = "linux-raw-sys"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f"
@@ -917,19 +899,6 @@ dependencies = [
]
[[package]]
-name = "procfs"
-version = "0.15.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "943ca7f9f29bab5844ecd8fdb3992c5969b6622bb9609b9502fef9b4310e3f1f"
-dependencies = [
- "bitflags",
- "byteorder",
- "hex",
- "lazy_static",
- "rustix 0.36.13",
-]
-
-[[package]]
name = "quote"
version = "1.0.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1030,20 +999,6 @@ checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustix"
-version = "0.36.13"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a38f9520be93aba504e8ca974197f46158de5dcaa9fa04b57c57cd6a679d658"
-dependencies = [
- "bitflags",
- "errno",
- "io-lifetimes",
- "libc",
- "linux-raw-sys 0.1.4",
- "windows-sys 0.45.0",
-]
-
-[[package]]
-name = "rustix"
version = "0.37.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d"
@@ -1052,7 +1007,7 @@ dependencies = [
"errno",
"io-lifetimes",
"libc",
- "linux-raw-sys 0.3.7",
+ "linux-raw-sys",
"windows-sys 0.48.0",
]
@@ -1245,7 +1200,7 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237"
dependencies = [
- "rustix 0.37.19",
+ "rustix",
"windows-sys 0.48.0",
]
diff --git a/Cargo.toml b/Cargo.toml
index aeeceee0..8bf1fa13 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -110,7 +110,7 @@ unicode-width = "0.1.10"
libc = "0.2.144"
[target.'cfg(target_os = "linux")'.dependencies]
-procfs = { version = "0.15.1", default-features = false }
+rustix = { version = "0.37.19", features = ["fs", "param", "process"] }
[target.'cfg(target_os = "macos")'.dependencies]
core-foundation = "0.9.3"
diff --git a/src/app/data_harvester/processes/linux.rs b/src/app/data_harvester/processes/linux.rs
index 78d022d5..b3bfb885 100644
--- a/src/app/data_harvester/processes/linux.rs
+++ b/src/app/data_harvester/processes/linux.rs
@@ -1,11 +1,13 @@
//! Process data collection for Linux.
-use std::fs::File;
+mod process;
+use process::*;
+
+use std::fs::{self, File};
use std::io::{BufRead, BufReader};
use std::time::Duration;
use hashbrown::{HashMap, HashSet};
-use procfs::process::{Process, Stat};
use sysinfo::{ProcessStatus, System};
use super::{ProcessHarvest, UserTable};
@@ -116,14 +118,21 @@ fn get_linux_cpu_usage(
}
fn read_proc(
- prev_proc: &PrevProcDetails, process: &Process, cpu_usage: f64, cpu_fraction: f64,
+ prev_proc: &PrevProcDetails, process: Process, cpu_usage: f64, cpu_fraction: f64,
use_current_cpu_total: bool, time_difference_in_secs: u64, total_memory: u64,
user_table: &mut UserTable,
) -> error::Result<(ProcessHarvest, u64)> {
- let stat = process.stat()?;
+ let Process {
+ pid: _,
+ uid,
+ stat,
+ io,
+ cmdline,
+ } = process;
+
let (command, name) = {
let truncated_name = stat.comm.as_str();
- if let Ok(cmdline) = process.cmdline() {
+ if let Ok(cmdline) = cmdline {
if cmdline.is_empty() {
(format!("[{}]", truncated_name), truncated_name.to_string())
} else {
@@ -169,7 +178,7 @@ fn read_proc(
// This can fail if permission is denied!
let (total_read_bytes, total_write_bytes, read_bytes_per_sec, write_bytes_per_sec) =
- if let Ok(io) = process.io() {
+ if let Ok(io) = io {
let total_read_bytes = io.read_bytes;
let total_write_bytes = io.write_bytes;
let prev_total_read_bytes = prev_proc.total_read_bytes;
@@ -195,9 +204,16 @@ fn read_proc(
(0, 0, 0, 0)
};
- let uid = process.uid()?;
+ let user = uid
+ .and_then(|uid| {
+ user_table
+ .get_uid_to_username_mapping(uid)
+ .map(Into::into)
+ .ok()
+ })
+ .unwrap_or_else(|| "N/A".into());
- let time = if let Ok(ticks_per_sec) = u32::try_from(procfs::ticks_per_second()) {
+ let time = if let Ok(ticks_per_sec) = u32::try_from(rustix::param::clock_ticks_per_second()) {
if ticks_per_sec == 0 {
Duration::ZERO
} else {
@@ -221,11 +237,8 @@ fn read_proc(
total_read_bytes,
total_write_bytes,
process_state,
- uid: Some(uid),
- user: user_table
- .get_uid_to_username_mapping(uid)
- .map(Into::into)
- .unwrap_or_else(|_| "N/A".into()),
+ uid,
+ user,
time,
},
new_process_times,
@@ -242,6 +255,10 @@ pub(crate) struct ProcHarvestOptions {
pub unnormalized_cpu: bool,
}
+fn is_str_numeric(s: &str) -> bool {
+ s.chars().all(|c| c.is_ascii_digit())
+}
+
pub(crate) fn get_process_data(
sys: &System, prev_proc: PrevProc<'_>, pid_mapping: &mut HashMap<Pid, PrevProcDetails>,
proc_harvest_options: ProcHarvestOptions, time_difference_in_secs: u64, total_memory: u64,
@@ -275,32 +292,36 @@ pub(crate) fn get_process_data(
let mut pids_to_clear: HashSet<Pid> = pid_mapping.keys().cloned().collect();
- let process_vector: Vec<ProcessHarvest> = std::fs::read_dir("/proc")?
- .filter_map(|dir| {
- if let Ok(dir) = dir {
- if let Ok(pid) = dir.file_name().to_string_lossy().trim().parse::<Pid>() {
- let Ok(process) = Process::new(pid) else {
- return None;
- };
- let prev_proc_details = pid_mapping.entry(pid).or_default();
-
- if let Ok((process_harvest, new_process_times)) = read_proc(
- prev_proc_details,
- &process,
- cpu_usage,
- cpu_fraction,
- use_current_cpu_total,
- time_difference_in_secs,
- total_memory,
- user_table,
- ) {
- prev_proc_details.cpu_time = new_process_times;
- prev_proc_details.total_read_bytes = process_harvest.total_read_bytes;
- prev_proc_details.total_write_bytes = process_harvest.total_write_bytes;
-
- pids_to_clear.remove(&pid);
- return Some(process_harvest);
- }
+ let pids = fs::read_dir("/proc")?.flatten().filter_map(|dir| {
+ if is_str_numeric(dir.file_name().to_string_lossy().trim()) {
+ Some(dir.path())
+ } else {
+ None
+ }
+ });
+
+ let process_vector: Vec<ProcessHarvest> = pids
+ .filter_map(|pid_path| {
+ if let Ok(process) = Process::from_path(pid_path) {
+ let pid = process.pid;
+ let prev_proc_details = pid_mapping.entry(pid).or_default();
+
+ if let Ok((process_harvest, new_process_times)) = read_proc(
+ prev_proc_details,
+ process,
+ cpu_usage,
+ cpu_fraction,
+ use_current_cpu_total,
+ time_difference_in_secs,
+ total_memory,
+ user_table,
+ ) {
+ prev_proc_details.cpu_time = new_process_times;
+ prev_proc_details.total_read_bytes = process_harvest.total_read_bytes;
+ prev_proc_details.total_write_bytes = process_harvest.total_write_bytes;
+
+ pids_to_clear.remove(&pid);
+ return Some(process_harvest);
}
}
diff --git a/src/app/data_harvester/processes/linux/process.rs b/src/app/data_harvester/processes/linux/process.rs
new file mode 100644
index 00000000..8889c697
--- /dev/null
+++ b/src/app/data_harvester/processes/linux/process.rs
@@ -0,0 +1,291 @@
+//! Linux process code for getting process data via `/proc/`.
+//! Based on the [procfs](https://github.com/eminence/procfs) crate.
+
+use std::{
+ fs::File,
+ io::{self, BufRead, BufReader, Read},
+ path::PathBuf,
+};
+
+use anyhow::anyhow;
+use libc::uid_t;
+use once_cell::sync::Lazy;
+use rustix::{
+ fd::OwnedFd,
+ fs::{Mode, OFlags},
+ path::Arg,
+};
+
+use crate::Pid;
+
+static PAGESIZE: Lazy<u64> = Lazy::new(|| rustix::param::page_size() as u64);
+
+#[inline]
+fn next_part<'a>(iter: &mut impl Iterator<Item = &'a str>) -> Result<&'a str, io::Error> {
+ iter.next()
+ .ok_or_else(|| io::Error::from(io::ErrorKind::InvalidData))
+}
+
+/// A wrapper around the data in `/proc/<PID>/stat`. For documentation, see [here](https://man7.org/linux/man-pages/man5/proc.5.html).
+///
+/// Note this does not necessarily get all fields, only the ones we use in bottom.
+pub(crate) struct Stat {
+ /// The filename of the executable without parentheses.
+ pub comm: String,
+
+ /// The current process state, represented by a char.
+ pub state: char,
+
+ /// The parent process PID.
+ pub ppid: Pid,
+
+ /// The amount of time this process has been scheduled in user mode in clock ticks.
+ pub utime: u64,
+
+ /// The amount of time this process has been scheduled in kernel mode in clock ticks.
+ pub stime: u64,
+
+ /// The resident set size, or the number of pages the process has in real memory.
+ pub rss: u64,
+}
+
+impl Stat {
+ #[inline]
+ fn from_file(mut f: File, buffer: &mut String) -> anyhow::Result<Stat> {
+ // Since this is just one line, we can read it all at once. However, since it might have non-utf8 characters,
+ // we can't just use read_to_string.
+ f.read_to_end(unsafe { buffer.as_mut_vec() })?;
+
+ let line = buffer.to_string_lossy();
+ let line = line.trim();
+
+ let (comm, rest) = {
+ let start_paren = line
+ .find('(')
+ .ok_or_else(|| anyhow!("start paren missing"))?;
+ let end_paren = line.find(')').ok_or_else(|| anyhow!("end paren missing"))?;
+
+ (
+ line[start_paren + 1..end_paren].to_string(),
+ &line[end_paren + 2..],
+ )
+ };
+
+ let mut rest = rest.split(' ');
+ let state = next_part(&mut rest)?
+ .chars()
+ .next()
+ .ok_or_else(|| anyhow!("missing state"))?;
+
+ let ppid: Pid = next_part(&mut rest)?.parse()?;
+
+ // Skip 9 fields until utime (pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt).
+ let mut rest = rest.skip(9);
+
+ let utime: u64 = next_part(&mut rest)?.parse()?;
+ let stime: u64 = next_part(&mut rest)?.parse()?;
+
+ // Skip 8 fields until rss (cutime, cstime, priority, nice, num_threads, itrealvalue, starttime, vsize).
+ let mut rest = rest.skip(8);
+
+ let rss: u64 = next_part(&mut rest)?.parse()?;
+
+ Ok(Stat {
+ comm,
+ state,
+ ppid,
+ utime,
+ stime,
+ rss,
+ })
+ }
+
+ /// Returns the Resident Set Size in bytes.
+ #[inline]
+ pub fn rss_bytes(&self) -> u64 {
+ self.rss * *PAGESIZE
+ }
+}
+
+/// A wrapper around the data in `/proc/<PID>/io`.
+///
+/// Note this does not necessarily get all fields, only the ones we use in bottom.
+pub(crate) struct Io {
+ pub read_bytes: u64,
+ pub write_bytes: u64,
+}
+
+impl Io {
+ #[inline]
+ fn from_file(f: File, buffer: &mut String) -> anyhow::Result<Io> {
+ const NUM_FIELDS: u16 = 0; // Make sure to update this if you want more fields!
+ enum Fields {
+ ReadBytes,
+ WriteBytes,
+ }
+
+ let mut read_fields = 0;
+ let mut reader = BufReader::new(f);
+
+ let mut read_bytes = 0;
+ let mut write_bytes = 0;
+
+ // This saves us from doing a string allocation on each iteration compared to `lines()`.
+ while let Ok(bytes) = reader.read_line(buffer) {
+ if bytes > 0 {
+ if buffer.is_empty() {
+ // Empty, no need to clear.
+ continue;
+ }
+
+ let mut parts = buffer.split_whitespace();
+
+ if let Some(field) = parts.next() {
+ let curr_field = match field {
+ "read_bytes:" => Fields::ReadBytes,
+ "write_bytes:" => Fields::WriteBytes,
+ _ => {
+ buffer.clear();
+ continue;
+ }
+ };
+
+ if let Some(value) = parts.next() {
+ let value = value.parse::<u64>()?;
+ match curr_field {
+ Fields::ReadBytes => {
+ read_bytes = value;
+ read_fields += 1;
+ }
+ Fields::WriteBytes => {
+ write_bytes = value;
+ read_fields += 1;
+ }
+ }
+ }
+ }
+
+ // Quick short circuit if we read all required fields.
+ if read_fields == NUM_FIELDS {
+ break;
+ }
+
+ buffer.clear();
+ } else {
+ break;
+ }
+ }
+
+ Ok(Io {
+ read_bytes,
+ write_bytes,
+ })
+ }
+}
+
+/// A wrapper around a Linux process operations in `/proc/<PID>`.
+///
+/// Core documentation based on [proc's manpages](https://man7.org/linux/man-pages/man5/proc.5.html).
+pub(crate) struct Process {
+ pub pid: Pid,
+ pub uid: Option<uid_t>,
+ pub stat: Stat,
+ pub io: anyhow::Result<Io>,
+ pub cmdline: anyhow::Result<Vec<String>>,
+}
+
+#[inline]
+fn reset(root: &mut PathBuf, buffer: &mut String) {
+ root.pop();
+ buffer.clear();
+}
+
+impl Process {
+ /// Creates a new [`Process`] given a `/proc/<PID>` path. This may fail if the process
+ /// no longer exists or there are permissions issues.
+ ///
+ /// Note that this pre-allocates fields on **creation**! As such, some data might end
+ /// up "outdated" depending on when you call some of the methods. Therefore, this struct
+ /// is only useful for either fields that are unlikely to change, or are short-lived and
+ /// will be discarded quickly.
+ pub(crate) fn from_path(pid_path: PathBuf) -> anyhow::Result<Process> {
+ // TODO: Pass in a buffer vec/string to share?
+
+ let fd = rustix::fs::openat(
+ rustix::fs::cwd(),
+ &pid_path,
+ OFlags::PATH | OFlags::DIRECTORY | OFlags::CLOEXEC,
+ Mode::empty(),
+ )?;
+
+ let pid = pid_path
+ .as_path()
+ .components()
+ .last()
+ .and_then(|s| s.to_string_lossy().parse::<Pid>().ok())
+ .or_else(|| {
+ rustix::fs::readlinkat(rustix::fs::cwd(), &pid_path, vec![])
+ .ok()
+ .and_then(|s| s.to_string_lossy().parse::<Pid>().ok())
+ })
+ .ok_or_else(|| anyhow!("PID for {pid_path:?} was not found"))?;
+
+ let uid = {
+ let metadata = rustix::fs::fstat(&fd);
+ match metadata {
+ Ok(md) => Some(md.st_uid),
+ Err(_) => None,
+ }
+ };
+
+ let mut root = pid_path;
+ let mut buffer = String::new();
+
+ // NB: Whenever you add a new stat, make sure to pop the root and clear the buffer!
+ let stat =
+ open_at(&mut root, "stat", &fd).and_then(|file| Stat::from_file(file, &mut buffer))?;
+ reset(&mut root, &mut buffer);
+
+ let cmdline = cmdline(&mut root, &fd, &mut buffer);
+ reset(&mut root, &mut buffer);
+
+ let io = open_at(&mut root, "io", &fd).and_then(|file| Io::from_file(file, &mut buffer));
+
+ Ok(Process {
+ pid,
+ uid,
+ stat,
+ io,
+ cmdline,
+ })
+ }
+}
+
+#[inline]
+fn cmdline(root: &mut PathBuf, fd: &OwnedFd, buffer: &mut String) -> anyhow::Result<Vec<String>> {
+ open_at(root, "cmdline", fd)
+ .map(|mut file| file.read_to_string(buffer))
+ .map(|_| {
+ buffer
+ .split('\0')
+ .filter_map(|s| {
+ if !s.is_empty() {
+ Some(s.to_string())
+ } else {
+ None
+ }
+ })
+ .collect::<Vec<_>>()
+ })
+ .map_err(Into::into)
+}
+
+/// Opens a path. Note that this function takes in a mutable root - this will mutate it to avoid allocations. You
+/// probably will want to pop the most recent child after if you need to use the buffer again.
+#[inline]
+fn open_at(root: &mut PathBuf, child: &str, fd: &OwnedFd) -> anyhow::Result<File> {
+ root.push(child);
+ let new_fd = rustix::fs::openat(fd, &*root, OFlags::RDONLY | OFlags::CLOEXEC, Mode::empty())?;
+
+ Ok(File::from(new_fd))
+}
diff --git a/src/utils/error.rs b/src/utils/error.rs
index a0abff33..bf6a5b38 100644
--- a/src/utils/error.rs
+++ b/src/utils/error.rs
@@ -2,9 +2,6 @@ use std::{borrow::Cow, result};
use thiserror::Error;
-#[cfg(target_os = "linux")]
-use procfs::ProcError;
-
/// A type alias for handling errors related to Bottom.
pub type Result<T> = result::Result<T, BottomError>;
@@ -37,10 +34,6 @@ pub enum BottomError {
MinorError,
#[error("Error casting integers {0}")]
TryFromIntError(#[from] std::num::TryFromIntError),
- /// An error to represent errors with procfs
- #[cfg(target_os = "linux")]
- #[error("Procfs error, {0}")]
- ProcfsError(String),
}
impl From<std::io::Error> for BottomError {
@@ -95,23 +88,3 @@ impl From<regex::Error> for BottomError {
BottomError::QueryError(format!("Regex error: {}", error.last().unwrap_or(&"")).into())
}
}
-
-#[cfg(target_os = "linux")]
-impl From<ProcError> for BottomError {
- fn from(err: ProcError) -> Self {
- match err {
- ProcError::PermissionDenied(p) => {
- BottomError::ProcfsError(format!("Permission denied for {:?}", p))
- }
- ProcError::NotFound(p) => BottomError::ProcfsError(format!("{:?} not found", p)),
- ProcError::Incomplete(p) => BottomError::ProcfsError(format!("{:?} incomplete", p)),
- ProcError::Io(e, p) => {
- BottomError::ProcfsError(format!("io error: {:?} for {:?}", e, p))
- }
- ProcError::Other(s) => BottomError::ProcfsError(format!("Other procfs error: {}", s)),
- ProcError::InternalError(e) => {
- BottomError::ProcfsError(format!("procfs internal error: {:?}", e))
- }
- }
- }
-}