diff options
author | Thomas Otto <th1000s@posteo.net> | 2021-11-16 21:29:55 +0100 |
---|---|---|
committer | Dan Davison <dandavison7@gmail.com> | 2021-11-18 12:23:12 -0500 |
commit | 416563503d68dac69fa71c78ea8fd245fd34858c (patch) | |
tree | 9de9f98af05fb7148f5053a5f7f591d7e7832196 /src/utils/process.rs | |
parent | 90fd9c60493cc6f653e76277d67e1fce611bbb1d (diff) |
Make process utils more generic
Diffstat (limited to 'src/utils/process.rs')
-rw-r--r-- | src/utils/process.rs | 303 |
1 files changed, 173 insertions, 130 deletions
diff --git a/src/utils/process.rs b/src/utils/process.rs index 844f9f31..a9c11a37 100644 --- a/src/utils/process.rs +++ b/src/utils/process.rs @@ -1,32 +1,108 @@ use std::collections::{HashMap, HashSet}; use sysinfo::{Pid, Process, ProcessExt, SystemExt}; +// Return value of `extract_args(args: &[String]) -> ProcessArgs<T>` function which is +// passed to `calling_process_cmdline()`. #[derive(Debug, PartialEq)] -pub enum GitBlameExtension { - Some(String), - None, - NotGitBlame, +pub enum ProcessArgs<T> { + // A result has been successfully extracted from args. + Args(T), + // The extraction has failed. + ArgError, + // The process does not match, others may be inspected. + OtherProcess, } pub fn git_blame_filename_extension() -> Option<String> { + calling_process_cmdline(blame::guess_git_blame_filename_extension) +} + +mod blame { + use super::*; + + // Skip all arguments starting with '-' from `args_it`. Also skip all arguments listed in + // `skip_this_plus_parameter` plus their respective next argument. + // Keep all arguments once a '--' is encountered. + // (Note that some arguments work with and without '=': '--foo' 'bar' / '--foo=bar') + fn skip_uninteresting_args<'a, 'b, ArgsI, SkipI>( + mut args_it: ArgsI, + skip_this_plus_parameter: SkipI, + ) -> Vec<&'a str> + where + ArgsI: Iterator<Item = &'a str>, + SkipI: Iterator<Item = &'b str>, + { + let arg_follows_space: HashSet<&'b str> = skip_this_plus_parameter.into_iter().collect(); + + let mut result = Vec::new(); + loop { + match args_it.next() { + None => break result, + Some("--") => { + result.extend(args_it); + break result; + } + Some(arg) if arg_follows_space.contains(arg) => { + let _skip_parameter = args_it.next(); + } + Some(arg) if !arg.starts_with('-') => { + result.push(arg); + } + Some(_) => { /* skip: --these -and --also=this */ } + } + } + } + + pub fn guess_git_blame_filename_extension(args: &[String]) -> ProcessArgs<String> { + { + let mut it = args.iter(); + match (it.next(), it.next()) { + // git blame or git -C/-c etc. and then (maybe) blame + (Some(git), Some(blame)) + if git.contains("git") && (blame == "blame" || blame.starts_with('-')) => {} + _ => return ProcessArgs::OtherProcess, + } + } + + let args = args.iter().skip(2).map(|s| s.as_str()); + + // See git(1) and git-blame(1). Some arguments separate their parameter with space or '=', e.g. + // --date 2015 or --date=2015. + let git_blame_options_with_parameter = + "-C -c -L --since --ignore-rev --ignore-revs-file --contents --reverse --date"; + + match skip_uninteresting_args(args, git_blame_options_with_parameter.split(' ')) + .last() + .and_then(|&s| s.split('.').last()) + .map(str::to_owned) + { + Some(ext) => ProcessArgs::Args(ext), + None => ProcessArgs::ArgError, + } + } +} // mod blame + +fn calling_process_cmdline<F, T>(extract_args: F) -> Option<T> +where + F: Fn(&[String]) -> ProcessArgs<T>, +{ let mut info = sysinfo::System::new(); let my_pid = std::process::id() as Pid; // 1) Try the parent process. If delta is set as the pager in git, then git is the parent process. let parent = parent_process(&mut info, my_pid)?; - match guess_git_blame_filename_extension(parent.cmd()) { - GitBlameExtension::Some(ext) => return Some(ext), - GitBlameExtension::None => return None, + match extract_args(parent.cmd()) { + ProcessArgs::Args(ext) => return Some(ext), + ProcessArgs::ArgError => return None, // 2) The parent process was something else, this can happen if git output is piped into delta, e.g. // `git blame foo.txt | delta`. When the shell sets up the pipe it creates the two processes, the pids - // are usually consecutive, so check if the proceess with `my_pid - 1` matches. - GitBlameExtension::NotGitBlame => { + // are usually consecutive, so check if the process with `my_pid - 1` matches. + ProcessArgs::OtherProcess => { let sibling = naive_sibling_process(&mut info, my_pid); if let Some(proc) = sibling { - if let GitBlameExtension::Some(ext) = guess_git_blame_filename_extension(proc.cmd()) - { + if let ProcessArgs::Args(ext) = extract_args(proc.cmd()) { return Some(ext); } } @@ -35,12 +111,12 @@ pub fn git_blame_filename_extension() -> Option<String> { } /* - 3) Neither parent nor direct sibling were a match. - The most likely case is that the input program of the pipe wrote all its data and exited before delta - started, so no file extension can be retrieved. Same if the data was piped from an input file. + 3) Neither parent nor direct sibling were a match. + The most likely case is that the input program of the pipe wrote all its data and exited before delta + started, so no command line can be parsed. Same if the data was piped from an input file. - There might also be intermediary scripts in between or piped input with randomized pids, so check all - processes for the closest `git blame` in the process tree. + There might also be intermediary scripts in between or piped input with a gap in pids or (rarely) + randomized pids, so check all processes for the closest match in the process tree. 100 /usr/bin/some-terminal-emulator 124 \_ -shell @@ -50,8 +126,8 @@ pub fn git_blame_filename_extension() -> Option<String> { 304 | \_ less --RAW-CONTROL-CHARS --quit-if-one-screen 125 \_ -shell 800 | \_ /usr/bin/git blame src/main.rs - 400 | \_ delta - 200 | \_ less --RAW-CONTROL-CHARS --quit-if-one-screen + 200 | \_ delta + 400 | \_ less --RAW-CONTROL-CHARS --quit-if-one-screen 126 \_ -shell 501 | \_ /bin/sh /wrapper/for/git blame src/main.rs 555 | | \_ /usr/bin/git blame src/main.rs @@ -59,68 +135,7 @@ pub fn git_blame_filename_extension() -> Option<String> { 567 | \_ less --RAW-CONTROL-CHARS --quit-if-one-screen */ - find_sibling_process(&mut info, my_pid) -} - -// Skip all arguments starting with '-' from `args_it`. Also skip all arguments listed in -// `skip_this_plus_parameter` plus their respective next argument. -// Keep all arguments once a '--' is encountered. -// (Note that some an argument work with and without '=', e.g. '--foo' 'bar' and '--foo=bar') -fn skip_uninteresting_args<'a, 'b, ArgsI, SkipI>( - mut args_it: ArgsI, - skip_this_plus_parameter: SkipI, -) -> Vec<&'a str> -where - ArgsI: Iterator<Item = &'a str>, - SkipI: Iterator<Item = &'b str>, -{ - let arg_follows_space: HashSet<&'b str> = skip_this_plus_parameter.into_iter().collect(); - - let mut result = Vec::new(); - loop { - match args_it.next() { - None => break result, - Some("--") => { - result.extend(args_it); - break result; - } - Some(arg) if arg_follows_space.contains(arg) => { - let _skip_parameter = args_it.next(); - } - Some(arg) if !arg.starts_with('-') => { - result.push(arg); - } - Some(_) => { /* skip: --these -and --also=this */ } - } - } -} - -fn guess_git_blame_filename_extension(args: &[String]) -> GitBlameExtension { - { - let mut it = args.iter(); - match (it.next(), it.next()) { - // git blame or git -C/-c etc. and then (maybe) blame - (Some(git), Some(blame)) - if git.contains("git") && (blame == "blame" || blame.starts_with('-')) => {} - _ => return GitBlameExtension::NotGitBlame, - } - } - - let args = args.iter().skip(2).map(|s| s.as_str()); - - // See git(1) and git-blame(1). Some arguments separate their parameter with space or '=', e.g. - // --date=2015 or --date 2015. - let git_blame_options_with_parameter = - "-C -c -L --since --ignore-rev --ignore-revs-file --contents --reverse --date"; - - match skip_uninteresting_args(args, git_blame_options_with_parameter.split(' ')) - .last() - .and_then(|&s| s.split('.').last()) - .map(str::to_owned) - { - Some(ext) => GitBlameExtension::Some(ext), - None => GitBlameExtension::None, - } + find_sibling_process(&mut info, my_pid, extract_args) } fn parent_process(info: &mut sysinfo::System, my_pid: Pid) -> Option<&Process> { @@ -137,19 +152,30 @@ fn naive_sibling_process(info: &mut sysinfo::System, my_pid: Pid) -> Option<&Pro info.process(sibling_pid) } -fn iter_parents<F>(info: &sysinfo::System, pid: Pid, distance: usize, mut f: F) +// Walk up the process tree, calling `f` with the pid and the distance to `starting_pid`. +// Prerequisite: `info.refresh_processes()` has been called. +fn iter_parents<F>(info: &sysinfo::System, starting_pid: Pid, f: F) where F: FnMut(Pid, usize), { - if let Some(proc) = info.process(pid) { - if let Some(pid) = proc.parent() { - f(pid, distance); - iter_parents(info, pid, distance + 1, f) + fn inner_iter_parents<F>(info: &sysinfo::System, pid: Pid, mut f: F, distance: usize) + where + F: FnMut(Pid, usize), + { + if let Some(proc) = info.process(pid) { + if let Some(pid) = proc.parent() { + f(pid, distance); + inner_iter_parents(info, pid, f, distance + 1) + } } } + inner_iter_parents(info, starting_pid, f, 1) } -fn find_sibling_process(info: &mut sysinfo::System, my_pid: Pid) -> Option<String> { +fn find_sibling_process<F, T>(info: &mut sysinfo::System, my_pid: Pid, extract_args: F) -> Option<T> +where + F: Fn(&[String]) -> ProcessArgs<T>, +{ info.refresh_processes(); let this_start_time = info.process(my_pid)?.start_time(); @@ -173,54 +199,54 @@ fn find_sibling_process(info: &mut sysinfo::System, my_pid: Pid) -> Option<Strin */ let mut pid_distances = HashMap::<Pid, usize>::new(); - let mut collect_parent_pids = |pid: Pid, distance| { + let mut collect_parent_pids = |pid, distance| { pid_distances.insert(pid, distance); }; - iter_parents(info, my_pid, 1, &mut collect_parent_pids); + iter_parents(info, my_pid, &mut collect_parent_pids); let process_start_time_difference_less_than_3s = |a, b| (a as i64 - b as i64).abs() < 3; - let closest_git_blame_extension = info + let cmdline_of_closest_matching_process = info .processes() .iter() .filter(|(_, proc)| { process_start_time_difference_less_than_3s(this_start_time, proc.start_time()) }) - .filter_map( - |(pid, proc)| match guess_git_blame_filename_extension(proc.cmd()) { - GitBlameExtension::Some(args) => { - let mut length_of_process_chain = usize::MAX; - - let mut sum_distance = |pid: Pid, distance: usize| { - if length_of_process_chain == usize::MAX { - if let Some(distance_to_first_common_parent) = pid_distances.get(&pid) { - length_of_process_chain = - distance_to_first_common_parent + distance; - } + .filter_map(|(&pid, proc)| match extract_args(proc.cmd()) { + ProcessArgs::Args(args) => { + let mut length_of_process_chain = usize::MAX; + + let mut sum_distance = |pid, distance| { + if length_of_process_chain == usize::MAX { + if let Some(distance_to_first_common_parent) = pid_distances.get(&pid) { + length_of_process_chain = distance_to_first_common_parent + distance; } - }; - iter_parents(info, *pid, 1, &mut sum_distance); + } + }; + iter_parents(info, pid, &mut sum_distance); - Some((length_of_process_chain, args)) - } - _ => None, - }, - ) + Some((length_of_process_chain, args)) + } + _ => None, + }) .min_by_key(|(distance, _)| *distance) .map(|(_, ext)| ext); - closest_git_blame_extension + cmdline_of_closest_matching_process } #[cfg(test)] mod tests { + use super::blame::*; use super::*; + use itertools::Itertools; + #[test] - fn test_guess_filename_extension_from_args() { - use GitBlameExtension::None; - use GitBlameExtension::Some; + fn test_guess_git_blame_filename_extension() { + use ProcessArgs::ArgError; + use ProcessArgs::Args; fn make_string_vec(args: &[&str]) -> Vec<String> { args.iter().map(|&x| x.to_owned()).collect::<Vec<String>>() @@ -228,7 +254,7 @@ mod tests { let args = make_string_vec(&["git", "blame", "hello", "world.txt"]); assert_eq!( guess_git_blame_filename_extension(&args), - Some("txt".into()) + Args("txt".into()) ); let args = make_string_vec(&[ @@ -243,62 +269,79 @@ mod tests { ]); assert_eq!( guess_git_blame_filename_extension(&args), - Some("txt".into()) + Args("txt".into()) ); let args = make_string_vec(&["git", "blame", "-s", "-f", "--", "hello.txt"]); assert_eq!( guess_git_blame_filename_extension(&args), - Some("txt".into()) + Args("txt".into()) ); let args = make_string_vec(&["git", "blame", "--", "--not.an.argument"]); assert_eq!( guess_git_blame_filename_extension(&args), - Some("argument".into()) + Args("argument".into()) ); let args = make_string_vec(&["foo", "bar", "-a", "--123", "not.git"]); assert_eq!( guess_git_blame_filename_extension(&args), - GitBlameExtension::NotGitBlame + ProcessArgs::OtherProcess ); let args = make_string_vec(&["git", "blame", "--help.txt"]); - assert_eq!(guess_git_blame_filename_extension(&args), None); + assert_eq!(guess_git_blame_filename_extension(&args), ArgError); let args = make_string_vec(&["git", "-c", "a=b", "blame", "main.rs"]); - assert_eq!(guess_git_blame_filename_extension(&args), Some("rs".into())); + assert_eq!(guess_git_blame_filename_extension(&args), Args("rs".into())); let args = make_string_vec(&["git", "blame", "README"]); assert_eq!( guess_git_blame_filename_extension(&args), - Some("README".into()) + Args("README".into()) ); let args = make_string_vec(&["git", "blame", ""]); - assert_eq!(guess_git_blame_filename_extension(&args), Some("".into())); + assert_eq!(guess_git_blame_filename_extension(&args), Args("".into())); } #[test] - fn test_process_parent_cmd_args() { - // Github runs CI tests for arm under qemu where where sysinfo can not find the parent pid. + fn test_calling_process_cmdline() { + // Github runs CI tests for arm under qemu where where sysinfo can not find the parent processr. if std::env::vars().any(|(key, _)| key == "CROSS_RUNNER" || key == "QEMU_LD_PREFIX") { return; } let mut info = sysinfo::System::new(); - let my_pid = std::process::id() as Pid; + info.refresh_processes(); + let mut ppid_distance = Vec::new(); + + iter_parents(&info, std::process::id() as Pid, |pid, distance| { + ppid_distance.push(pid as i32); + ppid_distance.push(distance as i32) + }); + + assert!(ppid_distance[1] == 1); - let parent = parent_process(&mut info, my_pid); + fn find_calling_process(args: &[String], want: &[&str]) -> ProcessArgs<()> { + if args.iter().any(|have| want.iter().any(|want| want == have)) { + ProcessArgs::Args(()) + } else { + ProcessArgs::ArgError + } + } - assert!(parent.is_some()); + // Tests that caller is something like "cargo test" or "tarpaulin" + let find_test = |args: &[String]| find_calling_process(args, &["test", "tarpaulin"]); + assert_eq!(calling_process_cmdline(find_test), Some(())); - // Tests that caller is something like "cargo test" - assert!(parent - .unwrap() - .cmd() + let nonsense = ppid_distance .iter() - .any(|a| a == "test" || a == "tarpaulin")); + .map(|i| i.to_string()) + .join("Y40ii4RihK6lHiK4BDsGSx"); + + let find_nothing = |args: &[String]| find_calling_process(args, &[&nonsense]); + assert_eq!(calling_process_cmdline(find_nothing), None); } } |