use std::borrow::Cow; use lazy_static::lazy_static; use regex::Regex; use serde::Deserialize; use crate::ansi; use crate::config::{ delta_unreachable, GrepType, HunkHeaderIncludeFilePath, HunkHeaderIncludeLineNumber, }; use crate::delta::{State, StateMachine}; use crate::handlers::{self, ripgrep_json}; use crate::paint::{self, BgShouldFill, StyleSectionSpecifier}; use crate::style::Style; use crate::utils::{process, tabs}; use super::hunk_header::HunkHeaderIncludeHunkLabel; #[derive(Debug, PartialEq, Eq)] pub struct GrepLine<'b> { pub grep_type: GrepType, pub path: Cow<'b, str>, pub line_number: Option, pub line_type: LineType, pub code: Cow<'b, str>, pub submatches: Option>, } impl<'b> GrepLine<'b> { fn expand_tabs(&mut self, tab_cfg: &tabs::TabCfg) { let old_len = self.code.len(); self.code = tabs::expand(&self.code, tab_cfg).into(); let shift = self.code.len().saturating_sub(old_len); self.submatches = self.submatches.as_ref().map(|submatches| { submatches .iter() .map(|(a, b)| (a + shift, b + shift)) .collect() }); } } #[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)] #[serde(rename_all = "lowercase")] pub enum LineType { ContextHeader, Context, FileHeader, Match, Ignore, } struct GrepOutputConfig { add_navigate_marker_to_matches: bool, render_context_header_as_hunk_header: bool, pad_line_number: bool, } lazy_static! { static ref OUTPUT_CONFIG: GrepOutputConfig = make_output_config(); } impl LineType { fn file_path_separator(&self) -> &str { // grep, rg, and git grep use ":" for matching lines // and "-" for non-matching lines (and `git grep -W` // uses "=" for a context header line). match self { LineType::Match => ":", LineType::Context => "-", LineType::ContextHeader => "=", LineType::Ignore | LineType::FileHeader => "", } } } impl<'a> StateMachine<'a> { // If this is a line of git grep output then render it accordingly. pub fn handle_grep_line(&mut self) -> std::io::Result { self.painter.emit()?; let (previous_path, previous_line_type, previous_line, try_parse) = match &self.state { State::Grep(_, line_type, path, line_number) => { (Some(path.clone()), Some(line_type), line_number, true) } State::Unknown => (None, None, &None, true), _ => (None, None, &None, false), }; let mut handled_line = false; if try_parse { let line = self.line.clone(); // TODO: avoid clone if let Some(grep_line) = parse_grep_line(&line) { if matches!(grep_line.line_type, LineType::Ignore) { handled_line = true; return Ok(handled_line); } let first_path = previous_path.is_none(); let new_path = first_path || previous_path.as_deref() != Some(&grep_line.path); // Emit a '--' section separator when output contains context lines (i.e. *grep option -A, -B, -C is in effect). let new_section = !new_path && (previous_line_type == Some(&LineType::Context) || grep_line.line_type == LineType::Context) && previous_line < &grep_line.line_number.as_ref().map(|n| n - 1); self.state = State::Grep( self.config .grep_output_type .clone() .unwrap_or_else(|| grep_line.grep_type.clone()), grep_line.line_type, grep_line.path.to_string(), grep_line.line_number, ); if new_path { if let Some(lang) = handlers::diff_header::get_extension(&grep_line.path) .or(self.config.default_language.as_deref()) { self.painter.set_syntax(Some(lang)); self.painter.set_highlighter(); } } match &self.state { State::Grep(GrepType::Ripgrep, _, _, _) => self.emit_ripgrep_format_grep_line( grep_line, new_path, first_path, new_section, ), State::Grep(GrepType::Classic, _, _, _) => { self.emit_classic_format_grep_line(grep_line) } _ => delta_unreachable("Impossible state while handling grep line."), }?; handled_line = true } } Ok(handled_line) } // Emulate ripgrep output: each section of hits from the same path has a header line, // and sections are separated by a blank line. Set language whenever path changes. fn emit_ripgrep_format_grep_line( &mut self, mut grep_line: GrepLine, new_path: bool, first_path: bool, new_section: bool, ) -> std::io::Result<()> { if new_path { // Emit new path header line if !first_path { writeln!(self.painter.writer)?; } handlers::hunk_header::write_line_of_code_with_optional_path_and_line_number( "", &[(0, 0)], None, &mut self.painter, &self.line, &grep_line.path, self.config.ripgrep_header_style.decoration_style, &self.config.grep_file_style, &self.config.grep_line_number_style, &HunkHeaderIncludeFilePath::Yes, &HunkHeaderIncludeLineNumber::No, &HunkHeaderIncludeHunkLabel::Yes, "", self.config, )? } if new_section { writeln!(self.painter.writer, "--")?; } // Emit the actual grep hit line let code_style_sections = match (&grep_line.line_type, &grep_line.submatches) { (LineType::Match, Some(_)) => { // We expand tabs at this late stage because // the tabs are escaped in the JSON, so // expansion must come after JSON parsing. // (At the time of writing, we are in this // arm iff we are handling `ripgrep --json` // output.) grep_line.expand_tabs(&self.config.tab_cfg); make_style_sections( &grep_line.code, &grep_line.submatches.unwrap(), self.config.grep_match_word_style, self.config.grep_match_line_style, ) } (LineType::Match, None) => { // HACK: We need tabs expanded, and we need // the &str passed to // `get_code_style_sections` to live long // enough. But at this point it is guaranteed // that this handler is going to handle this // line, so mutating it is acceptable. self.raw_line = tabs::expand(&self.raw_line, &self.config.tab_cfg); get_code_style_sections( &self.raw_line, self.config.grep_match_word_style, self.config.grep_match_line_style, &grep_line.path, grep_line.line_number, ) .unwrap_or(StyleSectionSpecifier::Style( self.config.grep_match_line_style, )) } _ => StyleSectionSpecifier::Style(self.config.grep_context_line_style), }; handlers::hunk_header::write_line_of_code_with_optional_path_and_line_number( &grep_line.code, &[(grep_line.line_number.unwrap_or(0), 0)], Some(code_style_sections), &mut self.painter, &self.line, &grep_line.path, crate::style::DecorationStyle::NoDecoration, &self.config.grep_file_style, &self.config.grep_line_number_style, &HunkHeaderIncludeFilePath::No, if grep_line.line_number.is_some() { &HunkHeaderIncludeLineNumber::Yes } else { &HunkHeaderIncludeLineNumber::No }, &HunkHeaderIncludeHunkLabel::No, grep_line.line_type.file_path_separator(), self.config, ) } fn emit_classic_format_grep_line(&mut self, grep_line: GrepLine) -> std::io::Result<()> { match ( &grep_line.line_type, OUTPUT_CONFIG.render_context_header_as_hunk_header, ) { // Emit context header line (`git grep -W`) (LineType::ContextHeader, true) => { handlers::hunk_header::write_line_of_code_with_optional_path_and_line_number( &grep_line.code, &[(grep_line.line_number.unwrap_or(0), 0)], None, &mut self.painter, &self.line, &grep_line.path, self.config.classic_grep_header_style.decoration_style, &self.config.classic_grep_header_file_style, &self.config.grep_line_number_style, &self.config.hunk_header_style_include_file_path, &self.config.hunk_header_style_include_line_number, &HunkHeaderIncludeHunkLabel::Yes, grep_line.line_type.file_path_separator(), self.config, )? } _ => { if self.config.navigate { write!( self.painter.writer, "{}", match ( &grep_line.line_type, OUTPUT_CONFIG.add_navigate_marker_to_matches ) { (LineType::Match, true) => "• ", (_, true) => " ", _ => "", } )? } self._emit_classic_format_file_and_line_number(&grep_line)?; self._emit_classic_format_code(grep_line)?; } } Ok(()) } fn _emit_classic_format_file_and_line_number( &mut self, grep_line: &GrepLine, ) -> std::io::Result<()> { let separator = if self.config.grep_separator_symbol == "keep" { grep_line.line_type.file_path_separator() } else { // But ":" results in a "file/path:number:" // construct that terminal emulators are more likely // to recognize and render as a clickable link. If // navigate is enabled then there is already a good // visual indicator of match lines (in addition to // the grep-match-style highlighting) and so we use // ":" for matches and non-matches alike. &self.config.grep_separator_symbol }; write!( self.painter.writer, "{}", paint::paint_file_path_with_line_number( grep_line.line_number, &grep_line.path, OUTPUT_CONFIG.pad_line_number, separator, true, Some(self.config.grep_file_style), Some(self.config.grep_line_number_style), self.config ) )?; Ok(()) } fn _emit_classic_format_code(&mut self, mut grep_line: GrepLine) -> std::io::Result<()> { let code_style_sections = match (&grep_line.line_type, &grep_line.submatches) { (LineType::Match, Some(_)) => { // We expand tabs at this late stage because // the tabs are escaped in the JSON, so // expansion must come after JSON parsing. // (At the time of writing, we are in this // arm iff we are handling `ripgrep --json` // output.) grep_line.expand_tabs(&self.config.tab_cfg); make_style_sections( &grep_line.code, &grep_line.submatches.unwrap(), self.config.grep_match_word_style, self.config.grep_match_line_style, ) } (LineType::Match, None) => { // HACK: We need tabs expanded, and we need // the &str passed to // `get_code_style_sections` to live long // enough. But at the point it is guaranteed // that this handler is going to handle this // line, so mutating it is acceptable. self.raw_line = tabs::expand(&self.raw_line, &self.config.tab_cfg); get_code_style_sections( &self.raw_line, self.config.grep_match_word_style, self.config.grep_match_line_style, &grep_line.path, grep_line.line_number, ) .unwrap_or(StyleSectionSpecifier::Style( self.config.grep_match_line_style, )) } _ => StyleSectionSpecifier::Style(self.config.grep_context_line_style), }; self.painter.syntax_highlight_and_paint_line( &format!("{}\n", grep_line.code), code_style_sections, self.state.clone(), BgShouldFill::default(), ); Ok(()) } } fn make_style_sections<'a>( line: &'a str, submatches: &[(usize, usize)], match_style: Style, non_match_style: Style, ) -> StyleSectionSpecifier<'a> { let mut sections = Vec::new(); let mut curr = 0; for (start_, end_) in submatches { let (start, end) = (*start_, *end_); if start > curr { sections.push((non_match_style, &line[curr..start])) }; sections.push((match_style, &line[start..end])); curr = end; } if curr < line.len() { sections.push((non_match_style, &line[curr..])) } StyleSectionSpecifier::StyleSections(sections) } // Return style sections describing colors received from git. fn get_code_style_sections<'b>( raw_line: &'b str, match_style: Style, non_match_style: Style, path: &str, line_number: Option, ) -> Option> { if let Some(prefix_end) = ansi::ansi_preserving_index( raw_line, match line_number { Some(n) => format!("{}:{}:", path, n).len() - 1, None => path.len(), }, ) { let match_style_sections = ansi::parse_style_sections(&raw_line[(prefix_end + 1)..]) .iter() .map(|(ansi_term_style, s)| { if ansi_term_style.is_bold && ansi_term_style.foreground == Some(ansi_term::Colour::Red) { (match_style, *s) } else { (non_match_style, *s) } }) .collect(); Some(StyleSectionSpecifier::StyleSections(match_style_sections)) } else { None } } fn make_output_config() -> GrepOutputConfig { match &*process::calling_process() { process::CallingProcess::GitGrep(command_line) if command_line.short_options.contains("-W") || command_line.long_options.contains("--function-context") => { // --function-context is in effect: i.e. the entire function is // being displayed. In that case we don't render the first line as a // header, since the second line is the true next line, and it will // be more readable to have these displayed normally. We do add the // navigate marker, since match lines will be surrounded by (many) // non-match lines. And, since we are printing (many) successive lines // of code, we pad line numbers <100 in order to maintain code // alignment up to line 9999. GrepOutputConfig { render_context_header_as_hunk_header: false, add_navigate_marker_to_matches: true, pad_line_number: true, } } process::CallingProcess::GitGrep(command_line) if command_line.short_options.contains("-p") || command_line.long_options.contains("--show-function") => { // --show-function is in effect, i.e. the function header is being // displayed, along with matches within the function. Therefore we // render the first line as a header, but we do not add the navigate // marker, since all non-header lines are matches. GrepOutputConfig { render_context_header_as_hunk_header: true, add_navigate_marker_to_matches: false, pad_line_number: true, } } _ => GrepOutputConfig { render_context_header_as_hunk_header: true, add_navigate_marker_to_matches: false, pad_line_number: true, }, } } enum GrepLineRegex { WithFileExtensionAndLineNumber, WithFileExtension, WithFileExtensionNoSpaces, WithoutSeparatorCharacters, } lazy_static! { static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_AND_LINE_NUMBER: Regex = make_grep_line_regex(GrepLineRegex::WithFileExtensionAndLineNumber); } lazy_static! { static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_NO_SPACES: Regex = make_grep_line_regex(GrepLineRegex::WithFileExtensionNoSpaces); } lazy_static! { static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION: Regex = make_grep_line_regex(GrepLineRegex::WithFileExtension); } lazy_static! { static ref GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS: Regex = make_grep_line_regex(GrepLineRegex::WithoutSeparatorCharacters); } // See tests for example grep lines fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex { // Grep tools such as `git grep` and `rg` emit lines like the following, // where "xxx" represents arbitrary code. Note that there are 3 possible // "separator characters": ':', '-', '='. // The format is ambiguous, but we attempt to parse it. // src/co-7-fig.rs:xxx // src/co-7-fig.rs:7:xxx // src/co-7-fig.rs-xxx // src/co-7-fig.rs-7-xxx // src/co-7-fig.rs=xxx // src/co-7-fig.rs=7=xxx // Makefile:xxx // Makefile:7:xxx // Makefile-xxx // Makefile-7-xxx // Make-7-file:xxx // Make-7-file:7:xxx // Make-7-file-xxx // Make-7-file-7-xxx let file_path = match regex_variant { GrepLineRegex::WithFileExtensionAndLineNumber | GrepLineRegex::WithFileExtension => { r" ( # 1. file name (colons not allowed) [^:|\ ] # try to be strict about what a file path can start with [^:]* # anything [^\ ]\.[^.\ :=-]{1,10} # extension ) " } GrepLineRegex::WithFileExtensionNoSpaces => { r" ( # 1. file name (colons not allowed) [^:|\ ]+ # try to be strict about what a file path can start with [^\ ]\.[^.\ :=-]{1,6} # extension ) " } GrepLineRegex::WithoutSeparatorCharacters => { r" ( # 1. file name (colons not allowed) [^:|\ =-] # try to be strict about what a file path can start with [^:=-]* # anything except separators [^:\ ] # a file name cannot end with whitespace ) " } }; let separator = match regex_variant { GrepLineRegex::WithFileExtensionAndLineNumber => { r#" (?: ( : # 2. match marker ([0-9]+): # 3. line number followed by second match marker ) | ( - # 4. nomatch marker ([0-9]+)- # 5. line number followed by second nomatch marker ) | ( = # 6. context header marker ([0-9]+)= # 7. line number followed by second header marker ) ) "# } _ => { r#" (?: ( : # 2. match marker (?:([0-9]+):)? # 3. optional: line number followed by second match marker ) | ( - # 4. nomatch marker (?:([0-9]+)-)? # 5. optional: line number followed by second nomatch marker ) | ( = # 6. context header marker (?:([0-9]+)=)? # 7. optional: line number followed by second header marker ) ) "# } }; Regex::new(&format!( "(?x) ^ {file_path} {separator} (.*) # 8. code (i.e. line contents) $ ", )) .unwrap() } pub fn parse_grep_line(line: &str) -> Option { if line.starts_with('{') { ripgrep_json::parse_line(line) } else { match &*process::calling_process() { process::CallingProcess::GitGrep(_) | process::CallingProcess::OtherGrep => [ &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_AND_LINE_NUMBER, &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_NO_SPACES, &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION, &*GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS, ] .iter() .find_map(|regex| _parse_grep_line(regex, line)), _ => None, } } } pub fn _parse_grep_line<'b>(regex: &Regex, line: &'b str) -> Option> { let caps = regex.captures(line)?; let file = caps.get(1).unwrap().as_str().into(); let (line_type, line_number) = &[ (2, LineType::Match), (4, LineType::Context), (6, LineType::ContextHeader), ] .iter() .find_map(|(i, line_type)| { if caps.get(*i).is_some() { let line_number: Option = caps.get(i + 1).and_then(|m| m.as_str().parse().ok()); Some((*line_type, line_number)) } else { None } }) .unwrap(); // The regex matches so one of the three alternatives must have matched let code = caps.get(8).unwrap().as_str().into(); Some(GrepLine { grep_type: GrepType::Classic, path: file, line_number: *line_number, line_type: *line_type, code, submatches: None, }) } #[cfg(test)] mod tests { use crate::handlers::grep::{parse_grep_line, GrepLine, GrepType, LineType}; use crate::utils::process::tests::FakeParentArgs; #[test] fn test_parse_grep_match() { let fake_parent_grep_command = "git --doesnt-matter grep --nor-this nor_this -- nor_this"; let _args = FakeParentArgs::for_scope(fake_parent_grep_command); assert_eq!( parse_grep_line("src/co-7-fig.rs:xxx"), Some(GrepLine { grep_type: GrepType::Classic, path: "src/co-7-fig.rs".into(), line_number: None, line_type: LineType::Match, code: "xxx".into(), submatches: None, }) ); assert_eq!( parse_grep_line("src/config.rs:use crate::minusplus::MinusPlus;"), Some(GrepLine { grep_type: GrepType::Classic, path: "src/config.rs".into(), line_number: None, line_type: LineType::Match, code: "use crate::minusplus::MinusPlus;".into(), submatches: None, }) ); assert_eq!( parse_grep_line( "src/config.rs: pub line_numbers_style_minusplus: MinusPlus