From caa1d1b88b7f1d3046d600b925fa357c20327be4 Mon Sep 17 00:00:00 2001 From: Clement Tsang <34804052+ClementTsang@users.noreply.github.com> Date: Sun, 21 May 2023 00:53:27 -0400 Subject: refactor: remove procfs (#1163) * refactor: remove procfs, use personal impls/rustix directly * buffer sharing * inline * some cleanup --- .gitignore | 2 +- Cargo.lock | 53 +--- Cargo.toml | 2 +- src/app/data_harvester/processes/linux.rs | 99 +++++--- src/app/data_harvester/processes/linux/process.rs | 291 ++++++++++++++++++++++ src/utils/error.rs | 27 -- 6 files changed, 357 insertions(+), 117 deletions(-) create mode 100644 src/app/data_harvester/processes/linux/process.rs diff --git a/.gitignore b/.gitignore index b386e452..3e337170 100644 --- a/.gitignore +++ b/.gitignore @@ -38,4 +38,4 @@ site/ # dhat heap profiling dhat-heap.json -dhat/ \ No newline at end of file +dhat/ diff --git a/Cargo.lock b/Cargo.lock index 509f8fa0..151c8bcc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -165,9 +165,9 @@ dependencies = [ "nvml-wrapper", "once_cell", "predicates", - "procfs", "ratatui", "regex", + "rustix", "serde", "serde_json", "starship-battery", @@ -574,12 +574,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - [[package]] name = "humantime" version = "2.1.0" @@ -631,7 +625,7 @@ checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" dependencies = [ "hermit-abi 0.3.1", "io-lifetimes", - "rustix 0.37.19", + "rustix", "windows-sys 0.48.0", ] @@ -659,12 +653,6 @@ dependencies = [ "static_assertions", ] -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - [[package]] name = "lazycell" version = "1.3.0" @@ -687,12 +675,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "linux-raw-sys" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" - [[package]] name = "linux-raw-sys" version = "0.3.7" @@ -916,19 +898,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "procfs" -version = "0.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "943ca7f9f29bab5844ecd8fdb3992c5969b6622bb9609b9502fef9b4310e3f1f" -dependencies = [ - "bitflags", - "byteorder", - "hex", - "lazy_static", - "rustix 0.36.13", -] - [[package]] name = "quote" version = "1.0.27" @@ -1028,20 +997,6 @@ version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" -[[package]] -name = "rustix" -version = "0.36.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a38f9520be93aba504e8ca974197f46158de5dcaa9fa04b57c57cd6a679d658" -dependencies = [ - "bitflags", - "errno", - "io-lifetimes", - "libc", - "linux-raw-sys 0.1.4", - "windows-sys 0.45.0", -] - [[package]] name = "rustix" version = "0.37.19" @@ -1052,7 +1007,7 @@ dependencies = [ "errno", "io-lifetimes", "libc", - "linux-raw-sys 0.3.7", + "linux-raw-sys", "windows-sys 0.48.0", ] @@ -1245,7 +1200,7 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" dependencies = [ - "rustix 0.37.19", + "rustix", "windows-sys 0.48.0", ] diff --git a/Cargo.toml b/Cargo.toml index aeeceee0..8bf1fa13 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -110,7 +110,7 @@ unicode-width = "0.1.10" libc = "0.2.144" [target.'cfg(target_os = "linux")'.dependencies] -procfs = { version = "0.15.1", default-features = false } +rustix = { version = "0.37.19", features = ["fs", "param", "process"] } [target.'cfg(target_os = "macos")'.dependencies] core-foundation = "0.9.3" diff --git a/src/app/data_harvester/processes/linux.rs b/src/app/data_harvester/processes/linux.rs index 78d022d5..b3bfb885 100644 --- a/src/app/data_harvester/processes/linux.rs +++ b/src/app/data_harvester/processes/linux.rs @@ -1,11 +1,13 @@ //! Process data collection for Linux. -use std::fs::File; +mod process; +use process::*; + +use std::fs::{self, File}; use std::io::{BufRead, BufReader}; use std::time::Duration; use hashbrown::{HashMap, HashSet}; -use procfs::process::{Process, Stat}; use sysinfo::{ProcessStatus, System}; use super::{ProcessHarvest, UserTable}; @@ -116,14 +118,21 @@ fn get_linux_cpu_usage( } fn read_proc( - prev_proc: &PrevProcDetails, process: &Process, cpu_usage: f64, cpu_fraction: f64, + prev_proc: &PrevProcDetails, process: Process, cpu_usage: f64, cpu_fraction: f64, use_current_cpu_total: bool, time_difference_in_secs: u64, total_memory: u64, user_table: &mut UserTable, ) -> error::Result<(ProcessHarvest, u64)> { - let stat = process.stat()?; + let Process { + pid: _, + uid, + stat, + io, + cmdline, + } = process; + let (command, name) = { let truncated_name = stat.comm.as_str(); - if let Ok(cmdline) = process.cmdline() { + if let Ok(cmdline) = cmdline { if cmdline.is_empty() { (format!("[{}]", truncated_name), truncated_name.to_string()) } else { @@ -169,7 +178,7 @@ fn read_proc( // This can fail if permission is denied! let (total_read_bytes, total_write_bytes, read_bytes_per_sec, write_bytes_per_sec) = - if let Ok(io) = process.io() { + if let Ok(io) = io { let total_read_bytes = io.read_bytes; let total_write_bytes = io.write_bytes; let prev_total_read_bytes = prev_proc.total_read_bytes; @@ -195,9 +204,16 @@ fn read_proc( (0, 0, 0, 0) }; - let uid = process.uid()?; + let user = uid + .and_then(|uid| { + user_table + .get_uid_to_username_mapping(uid) + .map(Into::into) + .ok() + }) + .unwrap_or_else(|| "N/A".into()); - let time = if let Ok(ticks_per_sec) = u32::try_from(procfs::ticks_per_second()) { + let time = if let Ok(ticks_per_sec) = u32::try_from(rustix::param::clock_ticks_per_second()) { if ticks_per_sec == 0 { Duration::ZERO } else { @@ -221,11 +237,8 @@ fn read_proc( total_read_bytes, total_write_bytes, process_state, - uid: Some(uid), - user: user_table - .get_uid_to_username_mapping(uid) - .map(Into::into) - .unwrap_or_else(|_| "N/A".into()), + uid, + user, time, }, new_process_times, @@ -242,6 +255,10 @@ pub(crate) struct ProcHarvestOptions { pub unnormalized_cpu: bool, } +fn is_str_numeric(s: &str) -> bool { + s.chars().all(|c| c.is_ascii_digit()) +} + pub(crate) fn get_process_data( sys: &System, prev_proc: PrevProc<'_>, pid_mapping: &mut HashMap, proc_harvest_options: ProcHarvestOptions, time_difference_in_secs: u64, total_memory: u64, @@ -275,32 +292,36 @@ pub(crate) fn get_process_data( let mut pids_to_clear: HashSet = pid_mapping.keys().cloned().collect(); - let process_vector: Vec = std::fs::read_dir("/proc")? - .filter_map(|dir| { - if let Ok(dir) = dir { - if let Ok(pid) = dir.file_name().to_string_lossy().trim().parse::() { - let Ok(process) = Process::new(pid) else { - return None; - }; - let prev_proc_details = pid_mapping.entry(pid).or_default(); - - if let Ok((process_harvest, new_process_times)) = read_proc( - prev_proc_details, - &process, - cpu_usage, - cpu_fraction, - use_current_cpu_total, - time_difference_in_secs, - total_memory, - user_table, - ) { - prev_proc_details.cpu_time = new_process_times; - prev_proc_details.total_read_bytes = process_harvest.total_read_bytes; - prev_proc_details.total_write_bytes = process_harvest.total_write_bytes; - - pids_to_clear.remove(&pid); - return Some(process_harvest); - } + let pids = fs::read_dir("/proc")?.flatten().filter_map(|dir| { + if is_str_numeric(dir.file_name().to_string_lossy().trim()) { + Some(dir.path()) + } else { + None + } + }); + + let process_vector: Vec = pids + .filter_map(|pid_path| { + if let Ok(process) = Process::from_path(pid_path) { + let pid = process.pid; + let prev_proc_details = pid_mapping.entry(pid).or_default(); + + if let Ok((process_harvest, new_process_times)) = read_proc( + prev_proc_details, + process, + cpu_usage, + cpu_fraction, + use_current_cpu_total, + time_difference_in_secs, + total_memory, + user_table, + ) { + prev_proc_details.cpu_time = new_process_times; + prev_proc_details.total_read_bytes = process_harvest.total_read_bytes; + prev_proc_details.total_write_bytes = process_harvest.total_write_bytes; + + pids_to_clear.remove(&pid); + return Some(process_harvest); } } diff --git a/src/app/data_harvester/processes/linux/process.rs b/src/app/data_harvester/processes/linux/process.rs new file mode 100644 index 00000000..8889c697 --- /dev/null +++ b/src/app/data_harvester/processes/linux/process.rs @@ -0,0 +1,291 @@ +//! Linux process code for getting process data via `/proc/`. +//! Based on the [procfs](https://github.com/eminence/procfs) crate. + +use std::{ + fs::File, + io::{self, BufRead, BufReader, Read}, + path::PathBuf, +}; + +use anyhow::anyhow; +use libc::uid_t; +use once_cell::sync::Lazy; +use rustix::{ + fd::OwnedFd, + fs::{Mode, OFlags}, + path::Arg, +}; + +use crate::Pid; + +static PAGESIZE: Lazy = Lazy::new(|| rustix::param::page_size() as u64); + +#[inline] +fn next_part<'a>(iter: &mut impl Iterator) -> Result<&'a str, io::Error> { + iter.next() + .ok_or_else(|| io::Error::from(io::ErrorKind::InvalidData)) +} + +/// A wrapper around the data in `/proc//stat`. For documentation, see [here](https://man7.org/linux/man-pages/man5/proc.5.html). +/// +/// Note this does not necessarily get all fields, only the ones we use in bottom. +pub(crate) struct Stat { + /// The filename of the executable without parentheses. + pub comm: String, + + /// The current process state, represented by a char. + pub state: char, + + /// The parent process PID. + pub ppid: Pid, + + /// The amount of time this process has been scheduled in user mode in clock ticks. + pub utime: u64, + + /// The amount of time this process has been scheduled in kernel mode in clock ticks. + pub stime: u64, + + /// The resident set size, or the number of pages the process has in real memory. + pub rss: u64, +} + +impl Stat { + #[inline] + fn from_file(mut f: File, buffer: &mut String) -> anyhow::Result { + // Since this is just one line, we can read it all at once. However, since it might have non-utf8 characters, + // we can't just use read_to_string. + f.read_to_end(unsafe { buffer.as_mut_vec() })?; + + let line = buffer.to_string_lossy(); + let line = line.trim(); + + let (comm, rest) = { + let start_paren = line + .find('(') + .ok_or_else(|| anyhow!("start paren missing"))?; + let end_paren = line.find(')').ok_or_else(|| anyhow!("end paren missing"))?; + + ( + line[start_paren + 1..end_paren].to_string(), + &line[end_paren + 2..], + ) + }; + + let mut rest = rest.split(' '); + let state = next_part(&mut rest)? + .chars() + .next() + .ok_or_else(|| anyhow!("missing state"))?; + + let ppid: Pid = next_part(&mut rest)?.parse()?; + + // Skip 9 fields until utime (pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt). + let mut rest = rest.skip(9); + + let utime: u64 = next_part(&mut rest)?.parse()?; + let stime: u64 = next_part(&mut rest)?.parse()?; + + // Skip 8 fields until rss (cutime, cstime, priority, nice, num_threads, itrealvalue, starttime, vsize). + let mut rest = rest.skip(8); + + let rss: u64 = next_part(&mut rest)?.parse()?; + + Ok(Stat { + comm, + state, + ppid, + utime, + stime, + rss, + }) + } + + /// Returns the Resident Set Size in bytes. + #[inline] + pub fn rss_bytes(&self) -> u64 { + self.rss * *PAGESIZE + } +} + +/// A wrapper around the data in `/proc//io`. +/// +/// Note this does not necessarily get all fields, only the ones we use in bottom. +pub(crate) struct Io { + pub read_bytes: u64, + pub write_bytes: u64, +} + +impl Io { + #[inline] + fn from_file(f: File, buffer: &mut String) -> anyhow::Result { + const NUM_FIELDS: u16 = 0; // Make sure to update this if you want more fields! + enum Fields { + ReadBytes, + WriteBytes, + } + + let mut read_fields = 0; + let mut reader = BufReader::new(f); + + let mut read_bytes = 0; + let mut write_bytes = 0; + + // This saves us from doing a string allocation on each iteration compared to `lines()`. + while let Ok(bytes) = reader.read_line(buffer) { + if bytes > 0 { + if buffer.is_empty() { + // Empty, no need to clear. + continue; + } + + let mut parts = buffer.split_whitespace(); + + if let Some(field) = parts.next() { + let curr_field = match field { + "read_bytes:" => Fields::ReadBytes, + "write_bytes:" => Fields::WriteBytes, + _ => { + buffer.clear(); + continue; + } + }; + + if let Some(value) = parts.next() { + let value = value.parse::()?; + match curr_field { + Fields::ReadBytes => { + read_bytes = value; + read_fields += 1; + } + Fields::WriteBytes => { + write_bytes = value; + read_fields += 1; + } + } + } + } + + // Quick short circuit if we read all required fields. + if read_fields == NUM_FIELDS { + break; + } + + buffer.clear(); + } else { + break; + } + } + + Ok(Io { + read_bytes, + write_bytes, + }) + } +} + +/// A wrapper around a Linux process operations in `/proc/`. +/// +/// Core documentation based on [proc's manpages](https://man7.org/linux/man-pages/man5/proc.5.html). +pub(crate) struct Process { + pub pid: Pid, + pub uid: Option, + pub stat: Stat, + pub io: anyhow::Result, + pub cmdline: anyhow::Result>, +} + +#[inline] +fn reset(root: &mut PathBuf, buffer: &mut String) { + root.pop(); + buffer.clear(); +} + +impl Process { + /// Creates a new [`Process`] given a `/proc/` path. This may fail if the process + /// no longer exists or there are permissions issues. + /// + /// Note that this pre-allocates fields on **creation**! As such, some data might end + /// up "outdated" depending on when you call some of the methods. Therefore, this struct + /// is only useful for either fields that are unlikely to change, or are short-lived and + /// will be discarded quickly. + pub(crate) fn from_path(pid_path: PathBuf) -> anyhow::Result { + // TODO: Pass in a buffer vec/string to share? + + let fd = rustix::fs::openat( + rustix::fs::cwd(), + &pid_path, + OFlags::PATH | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + )?; + + let pid = pid_path + .as_path() + .components() + .last() + .and_then(|s| s.to_string_lossy().parse::().ok()) + .or_else(|| { + rustix::fs::readlinkat(rustix::fs::cwd(), &pid_path, vec![]) + .ok() + .and_then(|s| s.to_string_lossy().parse::().ok()) + }) + .ok_or_else(|| anyhow!("PID for {pid_path:?} was not found"))?; + + let uid = { + let metadata = rustix::fs::fstat(&fd); + match metadata { + Ok(md) => Some(md.st_uid), + Err(_) => None, + } + }; + + let mut root = pid_path; + let mut buffer = String::new(); + + // NB: Whenever you add a new stat, make sure to pop the root and clear the buffer! + let stat = + open_at(&mut root, "stat", &fd).and_then(|file| Stat::from_file(file, &mut buffer))?; + reset(&mut root, &mut buffer); + + let cmdline = cmdline(&mut root, &fd, &mut buffer); + reset(&mut root, &mut buffer); + + let io = open_at(&mut root, "io", &fd).and_then(|file| Io::from_file(file, &mut buffer)); + + Ok(Process { + pid, + uid, + stat, + io, + cmdline, + }) + } +} + +#[inline] +fn cmdline(root: &mut PathBuf, fd: &OwnedFd, buffer: &mut String) -> anyhow::Result> { + open_at(root, "cmdline", fd) + .map(|mut file| file.read_to_string(buffer)) + .map(|_| { + buffer + .split('\0') + .filter_map(|s| { + if !s.is_empty() { + Some(s.to_string()) + } else { + None + } + }) + .collect::>() + }) + .map_err(Into::into) +} + +/// Opens a path. Note that this function takes in a mutable root - this will mutate it to avoid allocations. You +/// probably will want to pop the most recent child after if you need to use the buffer again. +#[inline] +fn open_at(root: &mut PathBuf, child: &str, fd: &OwnedFd) -> anyhow::Result { + root.push(child); + let new_fd = rustix::fs::openat(fd, &*root, OFlags::RDONLY | OFlags::CLOEXEC, Mode::empty())?; + + Ok(File::from(new_fd)) +} diff --git a/src/utils/error.rs b/src/utils/error.rs index a0abff33..bf6a5b38 100644 --- a/src/utils/error.rs +++ b/src/utils/error.rs @@ -2,9 +2,6 @@ use std::{borrow::Cow, result}; use thiserror::Error; -#[cfg(target_os = "linux")] -use procfs::ProcError; - /// A type alias for handling errors related to Bottom. pub type Result = result::Result; @@ -37,10 +34,6 @@ pub enum BottomError { MinorError, #[error("Error casting integers {0}")] TryFromIntError(#[from] std::num::TryFromIntError), - /// An error to represent errors with procfs - #[cfg(target_os = "linux")] - #[error("Procfs error, {0}")] - ProcfsError(String), } impl From for BottomError { @@ -95,23 +88,3 @@ impl From for BottomError { BottomError::QueryError(format!("Regex error: {}", error.last().unwrap_or(&"")).into()) } } - -#[cfg(target_os = "linux")] -impl From for BottomError { - fn from(err: ProcError) -> Self { - match err { - ProcError::PermissionDenied(p) => { - BottomError::ProcfsError(format!("Permission denied for {:?}", p)) - } - ProcError::NotFound(p) => BottomError::ProcfsError(format!("{:?} not found", p)), - ProcError::Incomplete(p) => BottomError::ProcfsError(format!("{:?} incomplete", p)), - ProcError::Io(e, p) => { - BottomError::ProcfsError(format!("io error: {:?} for {:?}", e, p)) - } - ProcError::Other(s) => BottomError::ProcfsError(format!("Other procfs error: {}", s)), - ProcError::InternalError(e) => { - BottomError::ProcfsError(format!("procfs internal error: {:?}", e)) - } - } - } -} -- cgit v1.2.3