From 929345f3136e04387531cc87c3627ead22217692 Mon Sep 17 00:00:00 2001 From: Thomas Otto Date: Mon, 15 Nov 2021 22:59:21 +0100 Subject: Util: check parent process for `git blame` filename (#770) Usecase: With delta used as a pager the command `git blame main.rs` calls delta, but the piped data does not mention the filename. Instead try to get the filename extension from the parent `git` process. If that fails try the direct sibling or all other `git blame` processes. --- Cargo.lock | 37 ++++++++ Cargo.toml | 7 ++ src/main.rs | 1 + src/utils.rs | 300 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 345 insertions(+) create mode 100644 src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index e1a845d3..65955aeb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -179,6 +179,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + [[package]] name = "crc32fast" version = "1.2.1" @@ -239,6 +245,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + [[package]] name = "either" version = "1.6.1" @@ -334,6 +346,7 @@ dependencies = [ "smol_str", "structopt", "syntect", + "sysinfo", "unicode-segmentation", "unicode-width", "vte", @@ -565,6 +578,15 @@ dependencies = [ "memoffset", ] +[[package]] +name = "ntapi" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f6bb902e437b6d86e03cce10a7e2af662292c5dfef23b65899ea3ac9354ad44" +dependencies = [ + "winapi", +] + [[package]] name = "num-integer" version = "0.1.44" @@ -868,6 +890,21 @@ dependencies = [ "yaml-rust", ] +[[package]] +name = "sysinfo" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e7de153d0438a648bb71e06e300e54fc641685e96af96d49b843f43172d341c" +dependencies = [ + "cfg-if 1.0.0", + "core-foundation-sys", + "doc-comment", + "libc", + "ntapi", + "once_cell", + "winapi", +] + [[package]] name = "termcolor" version = "1.1.2" diff --git a/Cargo.toml b/Cargo.toml index b58009c0..1f0d7c7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,13 @@ version = "4.6.0" default-features = false features = ["parsing", "assets", "yaml-load", "dump-load", "regex-onig"] +[dependencies.sysinfo] +# 0.20.* requires rustc 1.54 +version = "0.19.2" +# no default features to disable the use of threads +default-features = false +features = [] + [dependencies.error-chain] version = "0.12.4" default-features = false diff --git a/src/main.rs b/src/main.rs index 0955f90d..64395de1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,6 +22,7 @@ mod options; mod paint; mod parse_style; mod style; +mod utils; mod wrapping; mod subcommands; diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 00000000..6f997d93 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,300 @@ +use std::collections::{HashMap, HashSet}; +use sysinfo::{Pid, Process, ProcessExt, SystemExt}; + +#[derive(Debug, PartialEq)] +pub enum GitBlameExtension { + Some(String), + None, + NotGitBlame, +} + +pub fn git_blame_filename_extension() -> Option { + let mut info = sysinfo::System::new(); + let my_pid = std::process::id() as Pid; + + // 1) Try the parent process. If delta is set as the pager in git, then git is the parent process. + let parent = parent_process(&mut info, my_pid)?; + + match guess_git_blame_filename_extension(parent.cmd()) { + GitBlameExtension::Some(ext) => return Some(ext), + GitBlameExtension::None => return None, + + // 2) The parent process was something else, this can happen if git output is piped into delta, e.g. + // `git blame foo.txt | delta`. When the shell sets up the pipe it creates the two processes, the pids + // are usually consecutive, so check if the proceess with `my_pid - 1` matches. + GitBlameExtension::NotGitBlame => { + let sibling = naive_sibling_process(&mut info, my_pid); + if let Some(proc) = sibling { + if let GitBlameExtension::Some(ext) = guess_git_blame_filename_extension(proc.cmd()) + { + return Some(ext); + } + } + // else try the fallback + } + } + + /* + 3) Neither parent nor direct sibling were a match. + The most likely case is that the input program of the pipe wrote all its data and exited before delta + started, so no file extension can be retrieved. Same if the data was piped from an input file. + + There might also be intermediary scripts in between or piped input with randomized pids, so check all + processes for the closest `git blame` in the process tree. + + 100 /usr/bin/some-terminal-emulator + 124 \_ -shell + 301 | \_ /usr/bin/git blame src/main.rs + 302 | \_ wraps_delta.sh + 303 | \_ delta + 304 | \_ less --RAW-CONTROL-CHARS --quit-if-one-screen + 125 \_ -shell + 800 | \_ /usr/bin/git blame src/main.rs + 400 | \_ delta + 200 | \_ less --RAW-CONTROL-CHARS --quit-if-one-screen + 126 \_ -shell + 501 | \_ /bin/sh /wrapper/for/git blame src/main.rs + 555 | | \_ /usr/bin/git blame src/main.rs + 502 | \_ delta + 567 | \_ less --RAW-CONTROL-CHARS --quit-if-one-screen + + */ + find_sibling_process(&mut info, my_pid) +} + +// Skip all arguments starting with '-' from `args_it`. Also skip all arguments listed in +// `skip_this_plus_parameter` plus their respective next argument. +// Keep all arguments once a '--' is encountered. +// (Note that some an argument work with and without '=', e.g. '--foo' 'bar' and '--foo=bar') +fn skip_uninteresting_args<'a, 'b, ArgsI, SkipI>( + mut args_it: ArgsI, + skip_this_plus_parameter: SkipI, +) -> Vec<&'a str> +where + ArgsI: Iterator, + SkipI: Iterator, +{ + let arg_follows_space: HashSet<&'b str> = skip_this_plus_parameter.into_iter().collect(); + + let mut result = Vec::new(); + loop { + match args_it.next() { + None => break result, + Some("--") => { + result.extend(args_it); + break result; + } + Some(arg) if arg_follows_space.contains(arg) => { + let _skip_parameter = args_it.next(); + } + Some(arg) if !arg.starts_with('-') => { + result.push(arg); + } + Some(_) => { /* skip: --these -and --also=this */ } + } + } +} + +fn guess_git_blame_filename_extension(args: &[String]) -> GitBlameExtension { + { + let mut it = args.iter(); + match (it.next(), it.next()) { + // git blame or git -C/-c etc. and then (maybe) blame + (Some(git), Some(blame)) + if git.contains("git") && (blame == "blame" || blame.starts_with('-')) => {} + _ => return GitBlameExtension::NotGitBlame, + } + } + + let args = args.iter().skip(2).map(|s| s.as_str()); + + // See git(1) and git-blame(1). Some arguments separate their parameter with space or '=', e.g. + // --date=2015 or --date 2015. + let git_blame_options_with_parameter = + "-C -c -L --since --ignore-rev --ignore-revs-file --contents --reverse --date"; + + match skip_uninteresting_args(args, git_blame_options_with_parameter.split(' ')) + .last() + .and_then(|&s| s.split('.').last()) + .map(str::to_owned) + { + Some(ext) => GitBlameExtension::Some(ext), + None => GitBlameExtension::None, + } +} + +fn parent_process(info: &mut sysinfo::System, my_pid: Pid) -> Option<&Process> { + info.refresh_process(my_pid).then(|| ())?; + + let parent_pid = info.process(my_pid)?.parent()?; + info.refresh_process(parent_pid).then(|| ())?; + info.process(parent_pid) +} + +fn naive_sibling_process(info: &mut sysinfo::System, my_pid: Pid) -> Option<&Process> { + let sibling_pid = my_pid - 1; + info.refresh_process(sibling_pid).then(|| ())?; + info.process(sibling_pid) +} + +fn iter_parents(info: &sysinfo::System, pid: Pid, distance: usize, mut f: F) +where + F: FnMut(Pid, usize), +{ + if let Some(proc) = info.process(pid) { + if let Some(pid) = proc.parent() { + f(pid, distance); + iter_parents(info, pid, distance + 1, f) + } + } +} + +fn find_sibling_process(info: &mut sysinfo::System, my_pid: Pid) -> Option { + info.refresh_processes(); + + let this_start_time = info.process(my_pid)?.start_time(); + + /* + + $ start_blame_of.sh src/main.rs | delta + + \_ /usr/bin/some-terminal-emulator + | \_ common_git_and_delta_ancestor + | \_ /bin/sh /opt/git/start_blame_of.sh src/main.rs + | | \_ /bin/sh /opt/some/wrapper git blame src/main.rs + | | \_ /usr/bin/git blame src/main.rs + | \_ /bin/sh /opt/some/wrapper delta + | \_ delta + + Walk up the process tree of delta and of every matching other process, counting the steps + along the way. + Find the common ancestor processes, calculate the distance, and select the one with the shortest. + + */ + + let mut pid_distances = HashMap::::new(); + let mut collect_parent_pids = |pid: Pid, distance| { + pid_distances.insert(pid, distance); + }; + + iter_parents(info, my_pid, 1, &mut collect_parent_pids); + + let process_start_time_difference_less_than_3s = |a, b| (a as i64 - b as i64).abs() < 3; + + let closest_git_blame_extension = info + .processes() + .iter() + .filter(|(_, proc)| { + process_start_time_difference_less_than_3s(this_start_time, proc.start_time()) + }) + .filter_map( + |(pid, proc)| match guess_git_blame_filename_extension(proc.cmd()) { + GitBlameExtension::Some(args) => { + let mut length_of_process_chain = usize::MAX; + + let mut sum_distance = |pid: Pid, distance: usize| { + if length_of_process_chain == usize::MAX { + if let Some(distance_to_first_common_parent) = pid_distances.get(&pid) { + length_of_process_chain = + distance_to_first_common_parent + distance; + } + } + }; + iter_parents(info, *pid, 1, &mut sum_distance); + + Some((length_of_process_chain, args)) + } + _ => None, + }, + ) + .min_by_key(|(distance, _)| *distance) + .map(|(_, ext)| ext); + + closest_git_blame_extension +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_guess_filename_extension_from_args() { + use GitBlameExtension::None; + use GitBlameExtension::Some; + + fn make_string_vec(args: &[&str]) -> Vec { + args.iter().map(|&x| x.to_owned()).collect::>() + } + let args = make_string_vec(&["git", "blame", "hello", "world.txt"]); + assert_eq!( + guess_git_blame_filename_extension(&args), + Some("txt".into()) + ); + + let args = make_string_vec(&[ + "git", + "blame", + "-s", + "-f", + "hello.txt", + "--date=2015", + "--date", + "now", + ]); + assert_eq!( + guess_git_blame_filename_extension(&args), + Some("txt".into()) + ); + + let args = make_string_vec(&["git", "blame", "-s", "-f", "--", "hello.txt"]); + assert_eq!( + guess_git_blame_filename_extension(&args), + Some("txt".into()) + ); + + let args = make_string_vec(&["git", "blame", "--", "--not.an.argument"]); + assert_eq!( + guess_git_blame_filename_extension(&args), + Some("argument".into()) + ); + + let args = make_string_vec(&["foo", "bar", "-a", "--123", "not.git"]); + assert_eq!( + guess_git_blame_filename_extension(&args), + GitBlameExtension::NotGitBlame + ); + + let args = make_string_vec(&["git", "blame", "--help.txt"]); + assert_eq!(guess_git_blame_filename_extension(&args), None); + + let args = make_string_vec(&["git", "-c", "a=b", "blame", "main.rs"]); + assert_eq!(guess_git_blame_filename_extension(&args), Some("rs".into())); + + let args = make_string_vec(&["git", "blame", "README"]); + assert_eq!( + guess_git_blame_filename_extension(&args), + Some("README".into()) + ); + + let args = make_string_vec(&["git", "blame", ""]); + assert_eq!(guess_git_blame_filename_extension(&args), Some("".into())); + } + + #[test] + fn test_process_parent_cmd_args() { + // Github runs CI tests for arm under qemu where where sysinfo can not find the parent pid. + if std::env::vars().any(|(key, _)| key == "CROSS_RUNNER" || key == "QEMU_LD_PREFIX") { + return; + } + + let mut info = sysinfo::System::new(); + let my_pid = std::process::id() as Pid; + + let parent = parent_process(&mut info, my_pid); + + assert!(parent.is_some()); + + // Tests that caller is something like "cargo test" + assert!(parent.unwrap().cmd().iter().any(|a| a == "test")); + } +} -- cgit v1.2.3