diff options
author | Josh Triplett <josh@joshtriplett.org> | 2024-03-02 05:41:56 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-02 08:41:56 -0500 |
commit | f7ae06a5f1e6943d4a78c8ede313ecd89c8563db (patch) | |
tree | c4f29e4ffaca62c65015acfab6f4a657b32e03cd | |
parent | 037665ba37593665e5566fee8ac580ef6bbb170a (diff) |
Parse filename unambiguously using color escape sequences (#1634)
* Simplify handle_grep_line via early return
Rather than indenting the entire body of the function twice, return
early if the conditions aren't met.
* Parse filename unambiguously using color escape sequences
`git grep`, by default, emits ANSI color escape sequences around the
filename, separator, and line number. Parse these if available.
This currently assumes the default colors, and will fall back to the
previous parsing if any of the colors have been changed.
Add tests for filenames that previously failed to parse correctly.
-rw-r--r-- | src/handlers/grep.rs | 219 |
1 files changed, 170 insertions, 49 deletions
diff --git a/src/handlers/grep.rs b/src/handlers/grep.rs index 5aee6dc7..4cc93ab2 100644 --- a/src/handlers/grep.rs +++ b/src/handlers/grep.rs @@ -91,58 +91,64 @@ impl<'a> StateMachine<'a> { State::Unknown => (None, None, &None, true), _ => (None, None, &None, false), }; + if !try_parse { + return Ok(false); + } - let mut handled_line = false; - if try_parse { - let line = self.line.clone(); // TODO: avoid clone + // Try parse_raw_grep_line on raw_line, and fall back to parse_grep_line + let raw_line = self.raw_line.clone(); // TODO: avoid clone + let line; + let grep_line = if let Some(grep_line) = parse_raw_grep_line(&raw_line) { + grep_line + } else { + line = self.line.clone(); // TODO: avoid clone if let Some(grep_line) = parse_grep_line(&line) { - if matches!(grep_line.line_type, LineType::Ignore) { - handled_line = true; - return Ok(handled_line); - } - let first_path = previous_path.is_none(); - let new_path = first_path || previous_path.as_deref() != Some(&grep_line.path); - // Emit a '--' section separator when output contains context lines (i.e. *grep option -A, -B, -C is in effect). - let new_section = !new_path - && (previous_line_type == Some(&LineType::Context) - || grep_line.line_type == LineType::Context) - && previous_line < &grep_line.line_number.as_ref().map(|n| n - 1); - self.state = State::Grep( - self.config - .grep_output_type - .clone() - .unwrap_or_else(|| grep_line.grep_type.clone()), - grep_line.line_type, - grep_line.path.to_string(), - grep_line.line_number, - ); - if new_section { - self.painter.set_highlighter() - } - if new_path { - if let Some(lang) = handlers::diff_header::get_extension(&grep_line.path) - .or(self.config.default_language.as_deref()) - { - self.painter.set_syntax(Some(lang)); - self.painter.set_highlighter(); - } - } - match &self.state { - State::Grep(GrepType::Ripgrep, _, _, _) => self.emit_ripgrep_format_grep_line( - grep_line, - new_path, - first_path, - new_section, - ), - State::Grep(GrepType::Classic, _, _, _) => { - self.emit_classic_format_grep_line(grep_line) - } - _ => delta_unreachable("Impossible state while handling grep line."), - }?; - handled_line = true + grep_line + } else { + return Ok(false); } + }; + + if matches!(grep_line.line_type, LineType::Ignore) { + return Ok(true); } - Ok(handled_line) + let first_path = previous_path.is_none(); + let new_path = first_path || previous_path.as_deref() != Some(&grep_line.path); + // Emit a '--' section separator when output contains context lines (i.e. *grep option -A, -B, -C is in effect). + let new_section = !new_path + && (previous_line_type == Some(&LineType::Context) + || grep_line.line_type == LineType::Context) + && previous_line < &grep_line.line_number.as_ref().map(|n| n - 1); + self.state = State::Grep( + self.config + .grep_output_type + .clone() + .unwrap_or_else(|| grep_line.grep_type.clone()), + grep_line.line_type, + grep_line.path.to_string(), + grep_line.line_number, + ); + if new_section { + self.painter.set_highlighter() + } + if new_path { + if let Some(lang) = handlers::diff_header::get_extension(&grep_line.path) + .or(self.config.default_language.as_deref()) + { + self.painter.set_syntax(Some(lang)); + self.painter.set_highlighter(); + } + } + match &self.state { + State::Grep(GrepType::Ripgrep, _, _, _) => { + self.emit_ripgrep_format_grep_line(grep_line, new_path, first_path, new_section) + } + State::Grep(GrepType::Classic, _, _, _) => { + self.emit_classic_format_grep_line(grep_line) + } + _ => delta_unreachable("Impossible state while handling grep line."), + }?; + Ok(true) } // Emulate ripgrep output: each section of hits from the same path has a header line, @@ -467,6 +473,7 @@ fn make_output_config() -> GrepOutputConfig { } enum GrepLineRegex { + WithColor, WithFileExtensionAndLineNumber, WithFileExtension, WithFileExtensionNoSpaces, @@ -474,6 +481,11 @@ enum GrepLineRegex { } lazy_static! { + static ref GREP_LINE_REGEX_ASSUMING_COLOR: Regex = + make_grep_line_regex(GrepLineRegex::WithColor); +} + +lazy_static! { static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_AND_LINE_NUMBER: Regex = make_grep_line_regex(GrepLineRegex::WithFileExtensionAndLineNumber); } @@ -519,6 +531,15 @@ fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex { // Make-7-file-7-xxx let file_path = match regex_variant { + GrepLineRegex::WithColor => { + r" + \x1b\[35m # starts with ANSI color sequence + ( # 1. file name + [^\x1b]* # anything + ) + \x1b\[m # ends with ANSI color reset + " + } GrepLineRegex::WithFileExtensionAndLineNumber | GrepLineRegex::WithFileExtension => { r" ( # 1. file name (colons not allowed) @@ -548,6 +569,51 @@ fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex { }; let separator = match regex_variant { + GrepLineRegex::WithColor => { + r#" + \x1b\[36m # starts with ANSI color sequence for separator + (?: + ( + : # 2. match marker + \x1b\[m # ANSI color reset + (?: # optional: line number followed by second match marker + \x1b\[32m # ANSI color sequence for line number + ([0-9]+) # 3. line number + \x1b\[m # ANSI color reset + \x1b\[36m # ANSI color sequence for separator + : # second match marker + \x1b\[m # ANSI color reset + )? + ) + | + ( + - # 4. nomatch marker + \x1b\[m # ANSI color reset + (?: # optional: line number followed by second nomatch marker + \x1b\[32m # ANSI color sequence for line number + ([0-9]+) # 5. line number + \x1b\[m # ANSI color reset + \x1b\[36m # ANSI color sequence for separator + - # second nomatch marker + \x1b\[m # ANSI color reset + )? + ) + | + ( + = # 6. context header marker + \x1b\[m # ANSI color reset + (?: # optional: line number followed by second context header marker + \x1b\[32m # ANSI color sequence for line number + ([0-9]+) # 7. line number + \x1b\[m # ANSI color reset + \x1b\[36m # ANSI color sequence for separator + = # second context header marker + \x1b\[m # ANSI color reset + )? + ) + ) + "# + } GrepLineRegex::WithFileExtensionAndLineNumber => { r#" (?: @@ -620,6 +686,23 @@ pub fn parse_grep_line(line: &str) -> Option<GrepLine> { } } +pub fn parse_raw_grep_line(raw_line: &str) -> Option<GrepLine> { + // Early exit if we don't have an escape sequence + if !raw_line.starts_with("\x1b") { + return None; + } + if !matches!( + &*process::calling_process(), + process::CallingProcess::GitGrep(_) | process::CallingProcess::OtherGrep + ) { + return None; + } + _parse_grep_line(&GREP_LINE_REGEX_ASSUMING_COLOR, raw_line).map(|mut grep_line| { + grep_line.code = ansi::strip_ansi_codes(&grep_line.code).into(); + grep_line + }) +} + pub fn _parse_grep_line<'b>(regex: &Regex, line: &'b str) -> Option<GrepLine<'b>> { let caps = regex.captures(line)?; let file = caps.get(1).unwrap().as_str().into(); @@ -652,7 +735,9 @@ pub fn _parse_grep_line<'b>(regex: &Regex, line: &'b str) -> Option<GrepLine<'b> #[cfg(test)] mod tests { - use crate::handlers::grep::{parse_grep_line, GrepLine, GrepType, LineType}; + use crate::handlers::grep::{ + parse_grep_line, parse_raw_grep_line, GrepLine, GrepType, LineType, + }; use crate::utils::process::tests::FakeParentArgs; #[test] @@ -866,6 +951,42 @@ mod tests { } #[test] + fn test_parse_grep_color() { + let fake_parent_grep_command = + "/usr/local/bin/git --doesnt-matter grep --nor-this nor_this -- nor_this"; + let _args = FakeParentArgs::for_scope(fake_parent_grep_command); + + let e = "\x1B"; + assert_eq!( + parse_raw_grep_line(&format!( + r#"{e}[35msrc/zlib-ng/configure{e}[m{e}[36m:{e}[m -a*=* | -{e}[1;32m-arch{e}[ms=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;"# + )), + Some(GrepLine { + grep_type: GrepType::Classic, + path: "src/zlib-ng/configure".into(), + line_number: None, + line_type: LineType::Match, + code: r#" -a*=* | --archs=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;"#.into(), + submatches: None, + }) + ); + + assert_eq!( + parse_raw_grep_line(&format!( + r#"{e}[35msrc/zlib-ng/configure{e}[m{e}[36m:{e}[m{e}[32m214{e}[m{e}[36m:{e}[m -a*=* | -{e}[1;32m-arch{e}[ms=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;"# + )), + Some(GrepLine { + grep_type: GrepType::Classic, + path: "src/zlib-ng/configure".into(), + line_number: Some(214), + line_type: LineType::Match, + code: r#" -a*=* | --archs=*) ARCHS=$(echo $1 | sed 's/.*=//'); shift ;;"#.into(), + submatches: None, + }) + ); + } + + #[test] fn test_parse_grep_no_match() { let fake_parent_grep_command = "/usr/local/bin/git --doesnt-matter grep --nor-this nor_this -- nor_this"; |