diff options
author | Clement Tsang <34804052+ClementTsang@users.noreply.github.com> | 2021-05-15 18:57:02 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-05-15 21:57:02 -0400 |
commit | 6847f2ff0ce5827c0779a1dfcb9e6a8a657652b1 (patch) | |
tree | 2b957d6701f06720baf2cab836303400f340e6f2 /src/app/data_harvester/processes.rs | |
parent | 39c5ee991e8a02a72da398433c62f4a1b8c7acbb (diff) |
refactor: split up data collection by OS (#482)
Refactor to split up data collection by OS and/or the backing library. The goal is to make it easier to work with and add new OS support, as opposed to how it was prior where we stored OS-independent implementations all in the same file.
Diffstat (limited to 'src/app/data_harvester/processes.rs')
-rw-r--r-- | src/app/data_harvester/processes.rs | 578 |
1 files changed, 0 insertions, 578 deletions
diff --git a/src/app/data_harvester/processes.rs b/src/app/data_harvester/processes.rs deleted file mode 100644 index 94f6fb62..00000000 --- a/src/app/data_harvester/processes.rs +++ /dev/null @@ -1,578 +0,0 @@ -use crate::Pid; - -use sysinfo::ProcessStatus; - -#[cfg(target_family = "unix")] -use crate::utils::error; - -#[cfg(target_os = "linux")] -use procfs::process::{Process, Stat}; - -#[cfg(target_os = "linux")] -use crate::utils::error::BottomError; - -#[cfg(target_os = "linux")] -use fxhash::{FxHashMap, FxHashSet}; - -#[cfg(not(target_os = "linux"))] -use sysinfo::{ProcessExt, ProcessorExt, System, SystemExt}; - -/// Maximum character length of a /proc/<PID>/stat process name. -/// If it's equal or greater, then we instead refer to the command for the name. -#[cfg(target_os = "linux")] -const MAX_STAT_NAME_LEN: usize = 15; - -// TODO: Add value so we know if it's sorted ascending or descending by default? -#[derive(Clone, PartialEq, Eq, Hash, Debug)] -pub enum ProcessSorting { - CpuPercent, - Mem, - MemPercent, - Pid, - ProcessName, - Command, - ReadPerSecond, - WritePerSecond, - TotalRead, - TotalWrite, - State, - User, - Count, -} - -impl std::fmt::Display for ProcessSorting { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - match &self { - ProcessSorting::CpuPercent => "CPU%", - ProcessSorting::MemPercent => "Mem%", - ProcessSorting::Mem => "Mem", - ProcessSorting::ReadPerSecond => "R/s", - ProcessSorting::WritePerSecond => "W/s", - ProcessSorting::TotalRead => "T.Read", - ProcessSorting::TotalWrite => "T.Write", - ProcessSorting::State => "State", - ProcessSorting::ProcessName => "Name", - ProcessSorting::Command => "Command", - ProcessSorting::Pid => "PID", - ProcessSorting::Count => "Count", - ProcessSorting::User => "User", - } - ) - } -} - -impl Default for ProcessSorting { - fn default() -> Self { - ProcessSorting::CpuPercent - } -} - -#[derive(Debug, Clone, Default)] -pub struct ProcessHarvest { - pub pid: Pid, - pub parent_pid: Option<Pid>, // Remember, parent_pid 0 is root... - pub cpu_usage_percent: f64, - pub mem_usage_percent: f64, - pub mem_usage_bytes: u64, - // pub rss_kb: u64, - // pub virt_kb: u64, - pub name: String, - pub command: String, - pub read_bytes_per_sec: u64, - pub write_bytes_per_sec: u64, - pub total_read_bytes: u64, - pub total_write_bytes: u64, - pub process_state: String, - pub process_state_char: char, - - /// This is the *effective* user ID. - #[cfg(target_family = "unix")] - pub uid: Option<libc::uid_t>, -} - -#[cfg(target_os = "linux")] -#[derive(Debug, Clone)] -pub struct PrevProcDetails { - pub total_read_bytes: u64, - pub total_write_bytes: u64, - pub cpu_time: u64, - pub process: Process, -} - -#[cfg(target_os = "linux")] -impl PrevProcDetails { - fn new(pid: Pid) -> error::Result<Self> { - Ok(Self { - total_read_bytes: 0, - total_write_bytes: 0, - cpu_time: 0, - process: Process::new(pid)?, - }) - } -} - -#[cfg(target_family = "unix")] -#[derive(Debug, Default)] -pub struct UserTable { - pub uid_user_mapping: std::collections::HashMap<libc::uid_t, String>, -} - -#[cfg(target_family = "unix")] -impl UserTable { - pub fn get_uid_to_username_mapping(&mut self, uid: libc::uid_t) -> error::Result<String> { - if let Some(user) = self.uid_user_mapping.get(&uid) { - Ok(user.clone()) - } else { - // SAFETY: getpwuid returns a null pointer if no passwd entry is found for the uid - let passwd = unsafe { libc::getpwuid(uid) }; - - if passwd.is_null() { - return Err(error::BottomError::QueryError("Missing passwd".into())); - } - - let username = unsafe { std::ffi::CStr::from_ptr((*passwd).pw_name) } - .to_str()? - .to_string(); - self.uid_user_mapping.insert(uid, username.clone()); - - Ok(username) - } - } -} - -#[cfg(target_os = "linux")] -fn cpu_usage_calculation( - prev_idle: &mut f64, prev_non_idle: &mut f64, -) -> error::Result<(f64, f64)> { - use std::io::prelude::*; - use std::io::BufReader; - - // From SO answer: https://stackoverflow.com/a/23376195 - - let mut reader = BufReader::new(std::fs::File::open("/proc/stat")?); - let mut first_line = String::new(); - reader.read_line(&mut first_line)?; - - let val = first_line.split_whitespace().collect::<Vec<&str>>(); - - // SC in case that the parsing will fail due to length: - if val.len() <= 10 { - return Err(error::BottomError::InvalidIo(format!( - "CPU parsing will fail due to too short of a return value; saw {} values, expected 10 values.", - val.len() - ))); - } - - let user: f64 = val[1].parse::<_>().unwrap_or(0_f64); - let nice: f64 = val[2].parse::<_>().unwrap_or(0_f64); - let system: f64 = val[3].parse::<_>().unwrap_or(0_f64); - let idle: f64 = val[4].parse::<_>().unwrap_or(0_f64); - let iowait: f64 = val[5].parse::<_>().unwrap_or(0_f64); - let irq: f64 = val[6].parse::<_>().unwrap_or(0_f64); - let softirq: f64 = val[7].parse::<_>().unwrap_or(0_f64); - let steal: f64 = val[8].parse::<_>().unwrap_or(0_f64); - let guest: f64 = val[9].parse::<_>().unwrap_or(0_f64); - - let idle = idle + iowait; - let non_idle = user + nice + system + irq + softirq + steal + guest; - - let total = idle + non_idle; - let prev_total = *prev_idle + *prev_non_idle; - - let total_delta: f64 = total - prev_total; - let idle_delta: f64 = idle - *prev_idle; - - *prev_idle = idle; - *prev_non_idle = non_idle; - - let result = if total_delta - idle_delta != 0_f64 { - total_delta - idle_delta - } else { - 1_f64 - }; - - let cpu_percentage = if total_delta != 0_f64 { - result / total_delta - } else { - 0_f64 - }; - - Ok((result, cpu_percentage)) -} - -/// Returns the usage and a new set of process times. Note: cpu_fraction should be represented WITHOUT the x100 factor! -#[cfg(target_os = "linux")] -fn get_linux_cpu_usage( - stat: &Stat, cpu_usage: f64, cpu_fraction: f64, prev_proc_times: u64, - use_current_cpu_total: bool, -) -> (f64, u64) { - // Based heavily on https://stackoverflow.com/a/23376195 and https://stackoverflow.com/a/1424556 - let new_proc_times = stat.utime + stat.stime; - let diff = (new_proc_times - prev_proc_times) as f64; // I HATE that it's done like this but there isn't a try_from for u64 -> f64... we can accept a bit of loss in the worst case though - - if cpu_usage == 0.0 { - (0.0, new_proc_times) - } else if use_current_cpu_total { - (diff / cpu_usage * 100_f64, new_proc_times) - } else { - (diff / cpu_usage * 100_f64 * cpu_fraction, new_proc_times) - } -} - -#[cfg(target_os = "macos")] -fn get_macos_process_cpu_usage( - pids: &[i32], -) -> std::io::Result<std::collections::HashMap<i32, f64>> { - use itertools::Itertools; - let output = std::process::Command::new("ps") - .args(&["-o", "pid=,pcpu=", "-p"]) - .arg( - // Has to look like this since otherwise, it you hit a `unstable_name_collisions` warning. - Itertools::intersperse(pids.iter().map(i32::to_string), ",".to_string()) - .collect::<String>(), - ) - .output()?; - let mut result = std::collections::HashMap::new(); - String::from_utf8_lossy(&output.stdout) - .split_whitespace() - .chunks(2) - .into_iter() - .for_each(|chunk| { - let chunk: Vec<&str> = chunk.collect(); - if chunk.len() != 2 { - panic!("Unexpected `ps` output"); - } - let pid = chunk[0].parse(); - let usage = chunk[1].parse(); - if let (Ok(pid), Ok(usage)) = (pid, usage) { - result.insert(pid, usage); - } - }); - Ok(result) -} - -#[allow(clippy::too_many_arguments)] -#[cfg(target_os = "linux")] -fn read_proc( - prev_proc: &PrevProcDetails, stat: &Stat, cpu_usage: f64, cpu_fraction: f64, - use_current_cpu_total: bool, time_difference_in_secs: u64, mem_total_kb: u64, -) -> error::Result<(ProcessHarvest, u64)> { - use std::convert::TryFrom; - - let process = &prev_proc.process; - - let (command, name) = { - let truncated_name = stat.comm.as_str(); - if let Ok(cmdline) = process.cmdline() { - if cmdline.is_empty() { - (format!("[{}]", truncated_name), truncated_name.to_string()) - } else { - ( - cmdline.join(" "), - if truncated_name.len() >= MAX_STAT_NAME_LEN { - if let Some(first_part) = cmdline.first() { - // We're only interested in the executable part... not the file path. - // That's for command. - first_part - .rsplit_once('/') - .map(|(_prefix, suffix)| suffix) - .unwrap_or(&truncated_name) - .to_string() - } else { - truncated_name.to_string() - } - } else { - truncated_name.to_string() - }, - ) - } - } else { - (truncated_name.to_string(), truncated_name.to_string()) - } - }; - - let process_state_char = stat.state; - let process_state = ProcessStatus::from(process_state_char).to_string(); - let (cpu_usage_percent, new_process_times) = get_linux_cpu_usage( - &stat, - cpu_usage, - cpu_fraction, - prev_proc.cpu_time, - use_current_cpu_total, - ); - let parent_pid = Some(stat.ppid); - let mem_usage_bytes = u64::try_from(stat.rss_bytes()).unwrap_or(0); - let mem_usage_kb = mem_usage_bytes / 1024; - let mem_usage_percent = mem_usage_kb as f64 / mem_total_kb as f64 * 100.0; - - // This can fail if permission is denied! - - let (total_read_bytes, total_write_bytes, read_bytes_per_sec, write_bytes_per_sec) = - if let Ok(io) = process.io() { - let total_read_bytes = io.read_bytes; - let total_write_bytes = io.write_bytes; - - let read_bytes_per_sec = if time_difference_in_secs == 0 { - 0 - } else { - total_read_bytes.saturating_sub(prev_proc.total_read_bytes) - / time_difference_in_secs - }; - let write_bytes_per_sec = if time_difference_in_secs == 0 { - 0 - } else { - total_write_bytes.saturating_sub(prev_proc.total_write_bytes) - / time_difference_in_secs - }; - - ( - total_read_bytes, - total_write_bytes, - read_bytes_per_sec, - write_bytes_per_sec, - ) - } else { - (0, 0, 0, 0) - }; - - let uid = Some(process.owner); - - Ok(( - ProcessHarvest { - pid: process.pid, - parent_pid, - cpu_usage_percent, - mem_usage_percent, - mem_usage_bytes, - name, - command, - read_bytes_per_sec, - write_bytes_per_sec, - total_read_bytes, - total_write_bytes, - process_state, - process_state_char, - uid, - }, - new_process_times, - )) -} - -#[cfg(target_os = "linux")] -pub fn get_process_data( - prev_idle: &mut f64, prev_non_idle: &mut f64, - pid_mapping: &mut FxHashMap<Pid, PrevProcDetails>, use_current_cpu_total: bool, - time_difference_in_secs: u64, mem_total_kb: u64, -) -> crate::utils::error::Result<Vec<ProcessHarvest>> { - // TODO: [PROC THREADS] Add threads - - if let Ok((cpu_usage, cpu_fraction)) = cpu_usage_calculation(prev_idle, prev_non_idle) { - let mut pids_to_clear: FxHashSet<Pid> = pid_mapping.keys().cloned().collect(); - - let process_vector: Vec<ProcessHarvest> = std::fs::read_dir("/proc")? - .filter_map(|dir| { - if let Ok(dir) = dir { - if let Ok(pid) = dir.file_name().to_string_lossy().trim().parse::<Pid>() { - let mut fresh = false; - if !pid_mapping.contains_key(&pid) { - if let Ok(ppd) = PrevProcDetails::new(pid) { - pid_mapping.insert(pid, ppd); - fresh = true; - } else { - // Bail early. - return None; - } - }; - - if let Some(prev_proc_details) = pid_mapping.get_mut(&pid) { - let stat; - let stat_live; - if fresh { - stat = &prev_proc_details.process.stat; - } else if let Ok(s) = prev_proc_details.process.stat() { - stat_live = s; - stat = &stat_live; - } else { - // Bail early. - return None; - } - - if let Ok((process_harvest, new_process_times)) = read_proc( - &prev_proc_details, - stat, - cpu_usage, - cpu_fraction, - use_current_cpu_total, - time_difference_in_secs, - mem_total_kb, - ) { - prev_proc_details.cpu_time = new_process_times; - prev_proc_details.total_read_bytes = - process_harvest.total_read_bytes; - prev_proc_details.total_write_bytes = - process_harvest.total_write_bytes; - - pids_to_clear.remove(&pid); - return Some(process_harvest); - } - } - } - } - - None - }) - .collect(); - - pids_to_clear.iter().for_each(|pid| { - pid_mapping.remove(pid); - }); - - Ok(process_vector) - } else { - Err(BottomError::GenericError( - "Could not calculate CPU usage.".to_string(), - )) - } -} - -#[cfg(not(target_os = "linux"))] -pub fn get_process_data( - sys: &System, use_current_cpu_total: bool, mem_total_kb: u64, -) -> crate::utils::error::Result<Vec<ProcessHarvest>> { - let mut process_vector: Vec<ProcessHarvest> = Vec::new(); - let process_hashmap = sys.get_processes(); - let cpu_usage = sys.get_global_processor_info().get_cpu_usage() as f64 / 100.0; - let num_cpus = sys.get_processors().len() as f64; - for process_val in process_hashmap.values() { - let name = if process_val.name().is_empty() { - let process_cmd = process_val.cmd(); - if process_cmd.len() > 1 { - process_cmd[0].clone() - } else { - let process_exe = process_val.exe().file_stem(); - if let Some(exe) = process_exe { - let process_exe_opt = exe.to_str(); - if let Some(exe_name) = process_exe_opt { - exe_name.to_string() - } else { - "".to_string() - } - } else { - "".to_string() - } - } - } else { - process_val.name().to_string() - }; - let command = { - let command = process_val.cmd().join(" "); - if command.is_empty() { - name.to_string() - } else { - command - } - }; - - let pcu = if cfg!(target_os = "windows") || num_cpus == 0.0 { - process_val.cpu_usage() as f64 - } else { - process_val.cpu_usage() as f64 / num_cpus - }; - let process_cpu_usage = if use_current_cpu_total && cpu_usage > 0.0 { - pcu / cpu_usage - } else { - pcu - }; - - let disk_usage = process_val.disk_usage(); - #[cfg(target_os = "macos")] - { - process_vector.push(ProcessHarvest { - pid: process_val.pid(), - parent_pid: process_val.parent(), - name, - command, - mem_usage_percent: if mem_total_kb > 0 { - process_val.memory() as f64 * 100.0 / mem_total_kb as f64 - } else { - 0.0 - }, - mem_usage_bytes: process_val.memory() * 1024, - cpu_usage_percent: process_cpu_usage, - read_bytes_per_sec: disk_usage.read_bytes, - write_bytes_per_sec: disk_usage.written_bytes, - total_read_bytes: disk_usage.total_read_bytes, - total_write_bytes: disk_usage.total_written_bytes, - process_state: process_val.status().to_string(), - process_state_char: convert_process_status_to_char(process_val.status()), - uid: Some(process_val.uid), - }); - } - #[cfg(not(target_os = "macos"))] - { - process_vector.push(ProcessHarvest { - pid: process_val.pid(), - parent_pid: process_val.parent(), - name, - command, - mem_usage_percent: if mem_total_kb > 0 { - process_val.memory() as f64 * 100.0 / mem_total_kb as f64 - } else { - 0.0 - }, - mem_usage_bytes: process_val.memory() * 1024, - cpu_usage_percent: process_cpu_usage, - read_bytes_per_sec: disk_usage.read_bytes, - write_bytes_per_sec: disk_usage.written_bytes, - total_read_bytes: disk_usage.total_read_bytes, - total_write_bytes: disk_usage.total_written_bytes, - process_state: process_val.status().to_string(), - process_state_char: convert_process_status_to_char(process_val.status()), - }); - } - } - - #[cfg(target_os = "macos")] - { - let unknown_state = ProcessStatus::Unknown(0).to_string(); - let cpu_usage_unknown_pids: Vec<i32> = process_vector - .iter() - .filter(|process| process.process_state == unknown_state) - .map(|process| process.pid) - .collect(); - let cpu_usages = get_macos_process_cpu_usage(&cpu_usage_unknown_pids)?; - for process in &mut process_vector { - if cpu_usages.contains_key(&process.pid) { - process.cpu_usage_percent = if num_cpus == 0.0 { - *cpu_usages.get(&process.pid).unwrap() - } else { - *cpu_usages.get(&process.pid).unwrap() / num_cpus - }; - } - } - } - - Ok(process_vector) -} - -#[allow(unused_variables)] -#[cfg(not(target_os = "linux"))] -fn convert_process_status_to_char(status: ProcessStatus) -> char { - #[cfg(target_os = "macos")] - { - match status { - ProcessStatus::Run => 'R', - ProcessStatus::Sleep => 'S', - ProcessStatus::Idle => 'D', - ProcessStatus::Zombie => 'Z', - _ => '?', - } - } - #[cfg(not(target_os = "macos"))] - { - 'R' - } -} |