summaryrefslogtreecommitdiffstats
path: root/src/app/data_harvester/processes/linux/process.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/app/data_harvester/processes/linux/process.rs')
-rw-r--r--src/app/data_harvester/processes/linux/process.rs276
1 files changed, 276 insertions, 0 deletions
diff --git a/src/app/data_harvester/processes/linux/process.rs b/src/app/data_harvester/processes/linux/process.rs
new file mode 100644
index 00000000..f2fc6327
--- /dev/null
+++ b/src/app/data_harvester/processes/linux/process.rs
@@ -0,0 +1,276 @@
+//! Linux process code for getting process data via `/proc/`.
+//! Based on the [procfs](https://github.com/eminence/procfs) crate.
+
+use std::{
+ fs::File,
+ io::{self, BufRead, BufReader, Read},
+ path::PathBuf,
+};
+
+use anyhow::anyhow;
+use libc::uid_t;
+use once_cell::sync::Lazy;
+use rustix::{
+ fd::OwnedFd,
+ fs::{Mode, OFlags},
+ path::Arg,
+};
+
+use crate::Pid;
+
+static PAGESIZE: Lazy<u64> = Lazy::new(|| rustix::param::page_size() as u64);
+
+fn next_part<'a>(iter: &mut impl Iterator<Item = &'a str>) -> Result<&'a str, io::Error> {
+ iter.next()
+ .ok_or_else(|| io::Error::from(io::ErrorKind::InvalidData))
+}
+
+/// A wrapper around the data in `/proc/<PID>/stat`. For documentation, see [here](https://man7.org/linux/man-pages/man5/proc.5.html).
+///
+/// Note this does not necessarily get all fields, only the ones we use in bottom.
+pub(crate) struct Stat {
+ /// The filename of the executable without parentheses.
+ pub comm: String,
+
+ /// The current process state, represented by a char.
+ pub state: char,
+
+ /// The parent process PID.
+ pub ppid: Pid,
+
+ /// The amount of time this process has been scheduled in user mode in clock ticks.
+ pub utime: u64,
+
+ /// The amount of time this process has been scheduled in kernel mode in clock ticks.
+ pub stime: u64,
+
+ /// The resident set size, or the number of pages the process has in real memory.
+ pub rss: u64,
+}
+
+impl Stat {
+ fn from_file(mut f: File) -> anyhow::Result<Stat> {
+ // Since this is just one line, we can read it all at once. However, since it might have non-utf8 characters,
+ // we can't just use read_to_string.
+ let mut buffer = Vec::with_capacity(500);
+ f.read_to_end(&mut buffer)?;
+
+ let line = buffer.to_string_lossy();
+ let line = line.trim();
+
+ let (comm, rest) = {
+ let start_paren = line
+ .find('(')
+ .ok_or_else(|| anyhow!("start paren missing"))?;
+ let end_paren = line.find(')').ok_or_else(|| anyhow!("end paren missing"))?;
+
+ (
+ line[start_paren + 1..end_paren].to_string(),
+ &line[end_paren + 2..],
+ )
+ };
+
+ let mut rest = rest.split(' ');
+ let state = next_part(&mut rest)?
+ .chars()
+ .next()
+ .ok_or_else(|| anyhow!("missing state"))?;
+
+ let ppid: Pid = next_part(&mut rest)?.parse()?;
+
+ // Skip 9 fields until utime (pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt).
+ let mut rest = rest.skip(9);
+
+ let utime: u64 = next_part(&mut rest)?.parse()?;
+ let stime: u64 = next_part(&mut rest)?.parse()?;
+
+ // Skip 8 fields until rss (cutime, cstime, priority, nice, num_threads, itrealvalue, starttime, vsize).
+ let mut rest = rest.skip(8);
+
+ let rss: u64 = next_part(&mut rest)?.parse()?;
+
+ Ok(Stat {
+ comm,
+ state,
+ ppid,
+ utime,
+ stime,
+ rss,
+ })
+ }
+
+ /// Returns the Resident Set Size in bytes.
+ pub fn rss_bytes(&self) -> u64 {
+ self.rss * *PAGESIZE
+ }
+}
+
+/// A wrapper around the data in `/proc/<PID>/io`.
+///
+/// Note this does not necessarily get all fields, only the ones we use in bottom.
+pub(crate) struct Io {
+ pub read_bytes: u64,
+ pub write_bytes: u64,
+}
+
+impl Io {
+ fn from_file(f: File) -> anyhow::Result<Io> {
+ const NUM_FIELDS: u16 = 0; // Make sure to update this if you want more fields!
+ enum Fields {
+ ReadBytes,
+ WriteBytes,
+ }
+
+ let mut read_fields = 0;
+ let mut line = String::new();
+ let mut reader = BufReader::new(f);
+
+ let mut read_bytes = 0;
+ let mut write_bytes = 0;
+
+ // This saves us from doing a string allocation on each iteration compared to `lines()`.
+ while let Ok(bytes) = reader.read_line(&mut line) {
+ if bytes > 0 {
+ if line.is_empty() {
+ // Empty, no need to clear.
+ continue;
+ }
+
+ let mut parts = line.split_whitespace();
+
+ if let Some(field) = parts.next() {
+ let curr_field = match field {
+ "read_bytes:" => Fields::ReadBytes,
+ "write_bytes:" => Fields::WriteBytes,
+ _ => {
+ line.clear();
+ continue;
+ }
+ };
+
+ if let Some(value) = parts.next() {
+ let value = value.parse::<u64>()?;
+ match curr_field {
+ Fields::ReadBytes => {
+ read_bytes = value;
+ read_fields += 1;
+ }
+ Fields::WriteBytes => {
+ write_bytes = value;
+ read_fields += 1;
+ }
+ }
+ }
+ }
+
+ // Quick short circuit if we read all required fields.
+ if read_fields == NUM_FIELDS {
+ break;
+ }
+
+ line.clear();
+ } else {
+ break;
+ }
+ }
+
+ Ok(Io {
+ read_bytes,
+ write_bytes,
+ })
+ }
+}
+
+/// A wrapper around a Linux process operations in `/proc/<PID>`.
+///
+/// Core documentation based on [proc's manpages](https://man7.org/linux/man-pages/man5/proc.5.html).
+pub(crate) struct Process {
+ pub pid: Pid,
+ pub uid: Option<uid_t>,
+ pub stat: Stat,
+ pub io: anyhow::Result<Io>,
+ pub cmdline: anyhow::Result<Vec<String>>,
+}
+
+impl Process {
+ /// Creates a new [`Process`] given a `/proc/<PID>` path. This may fail if the process
+ /// no longer exists or there are permissions issues.
+ ///
+ /// Note that this pre-allocates fields on **creation**! As such, some data might end
+ /// up "outdated" depending on when you call some of the methods. Therefore, this struct
+ /// is only useful for either fields that are unlikely to change, or are short-lived and
+ /// will be discarded quickly.
+ pub(crate) fn from_path(pid_path: PathBuf) -> anyhow::Result<Process> {
+ // TODO: Pass in a buffer vec/string to share?
+
+ let fd = rustix::fs::openat(
+ rustix::fs::cwd(),
+ &pid_path,
+ OFlags::PATH | OFlags::DIRECTORY | OFlags::CLOEXEC,
+ Mode::empty(),
+ )?;
+
+ let pid = pid_path
+ .as_path()
+ .components()
+ .last()
+ .and_then(|s| s.to_string_lossy().parse::<Pid>().ok())
+ .or_else(|| {
+ rustix::fs::readlinkat(rustix::fs::cwd(), &pid_path, vec![])
+ .ok()
+ .and_then(|s| s.to_string_lossy().parse::<Pid>().ok())
+ })
+ .ok_or_else(|| anyhow!("PID for {pid_path:?} was not found"))?;
+
+ let uid = {
+ let metadata = rustix::fs::fstat(&fd);
+ match metadata {
+ Ok(md) => Some(md.st_uid),
+ Err(_) => None,
+ }
+ };
+
+ let mut root = pid_path.clone();
+ let cmdline = cmdline(&mut root, &fd);
+ root.pop();
+ let stat = open_at(&mut root, "stat", &fd).and_then(Stat::from_file)?;
+ root.pop();
+ let io = open_at(&mut root, "io", &fd).and_then(Io::from_file);
+
+ Ok(Process {
+ pid,
+ uid,
+ stat,
+ io,
+ cmdline,
+ })
+ }
+}
+
+fn cmdline(root: &mut PathBuf, fd: &OwnedFd) -> anyhow::Result<Vec<String>> {
+ let mut buf = String::new();
+ open_at(root, "cmdline", fd)
+ .map(|mut file| file.read_to_string(&mut buf))
+ .map(|_| {
+ buf.split('\0')
+ .filter_map(|s| {
+ if !s.is_empty() {
+ Some(s.to_string())
+ } else {
+ None
+ }
+ })
+ .collect::<Vec<_>>()
+ })
+ .map_err(Into::into)
+}
+
+/// Opens a path. Note that this function takes in a mutable root - this will mutate it to avoid allocations. You
+/// probably will want to pop the most recent child after if you need to use the buffer again.
+#[inline]
+fn open_at(root: &mut PathBuf, child: &str, fd: &OwnedFd) -> anyhow::Result<File> {
+ root.push(child);
+ let new_fd = rustix::fs::openat(&fd, &*root, OFlags::RDONLY | OFlags::CLOEXEC, Mode::empty())?;
+
+ Ok(File::from(new_fd))
+}