summaryrefslogtreecommitdiffstats
path: root/src/handlers/grep.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/handlers/grep.rs')
-rw-r--r--src/handlers/grep.rs711
1 files changed, 711 insertions, 0 deletions
diff --git a/src/handlers/grep.rs b/src/handlers/grep.rs
new file mode 100644
index 00000000..044c7400
--- /dev/null
+++ b/src/handlers/grep.rs
@@ -0,0 +1,711 @@
+// TODO
+// Bad parsing: "etc/examples/119-within-line-edits:4:repo=$(mktemp -d)"
+// Parsing "Makefile"
+// Inspect process tree once
+use std::borrow::Cow;
+
+use lazy_static::lazy_static;
+use regex::Regex;
+use serde::Deserialize;
+use unicode_segmentation::UnicodeSegmentation;
+
+use crate::ansi;
+use crate::delta::{State, StateMachine};
+use crate::handlers::{self, ripgrep_json};
+use crate::paint::{self, BgShouldFill, StyleSectionSpecifier};
+use crate::style::Style;
+use crate::utils;
+
+#[derive(Debug, PartialEq)]
+pub struct GrepLine<'b> {
+ pub path: Cow<'b, str>,
+ pub line_number: Option<usize>,
+ pub line_type: LineType,
+ pub code: Cow<'b, str>,
+ pub submatches: Option<Vec<(usize, usize)>>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum LineType {
+ ContextHeader,
+ Context,
+ Match,
+ Ignore,
+}
+
+struct GrepOutputConfig {
+ add_navigate_marker_to_matches: bool,
+ render_context_header_as_hunk_header: bool,
+ pad_line_number: bool,
+}
+
+lazy_static! {
+ static ref OUTPUT_CONFIG: GrepOutputConfig = make_output_config();
+}
+
+impl<'a> StateMachine<'a> {
+ /// If this is a line of git grep output then render it accordingly. If this
+ /// is the first grep line, then set the syntax-highlighter language.
+ pub fn handle_grep_line(&mut self) -> std::io::Result<bool> {
+ self.painter.emit()?;
+ let mut handled_line = false;
+
+ // TODO: It should be possible to eliminate some of the .clone()s and
+ // .to_owned()s.
+ let (_previous_file, repeat_grep_line, try_parse) = match &self.state {
+ State::Grep(file, repeat_grep_line) => {
+ (Some(file.as_str()), repeat_grep_line.clone(), true)
+ }
+ State::Unknown => (None, None, true),
+ _ => (None, None, false),
+ };
+ if try_parse {
+ if let Some(mut grep_line) = parse_grep_line(&self.line) {
+ if matches!(grep_line.line_type, LineType::Ignore) {
+ handled_line = true;
+ return Ok(handled_line);
+ }
+
+ // Emit syntax-highlighted code
+ // TODO: Determine the language less frequently, e.g. only when the file changes.
+ if let Some(lang) = handlers::file_meta::get_extension(&grep_line.path)
+ .or_else(|| self.config.default_language.as_deref())
+ {
+ self.painter.set_syntax(Some(lang));
+ self.painter.set_highlighter();
+ }
+ self.state = State::Grep(grep_line.path.to_string(), repeat_grep_line);
+
+ match (
+ &grep_line.line_type,
+ OUTPUT_CONFIG.render_context_header_as_hunk_header,
+ ) {
+ // Emit context header line
+ (LineType::ContextHeader, true) => handlers::hunk_header::write_hunk_header(
+ &grep_line.code,
+ &[(grep_line.line_number.unwrap_or(0), 0)],
+ &mut self.painter,
+ &self.line,
+ &grep_line.path,
+ self.config,
+ )?,
+ _ => {
+ if self.config.navigate {
+ write!(
+ self.painter.writer,
+ "{}",
+ match (
+ &grep_line.line_type,
+ OUTPUT_CONFIG.add_navigate_marker_to_matches
+ ) {
+ (LineType::Match, true) => "• ",
+ (_, true) => " ",
+ _ => "",
+ }
+ )?
+ }
+ // Emit file & line-number
+ let separator = if self.config.grep_separator_symbol == "keep" {
+ // grep, rg, and git grep use ":" for matching lines
+ // and "-" for non-matching lines (and `git grep -W`
+ // uses "=" for a context header line).
+ match grep_line.line_type {
+ LineType::Match => ":",
+ LineType::Context => "-",
+ LineType::ContextHeader => "=",
+ LineType::Ignore => "",
+ }
+ } else {
+ // But ":" results in a "file/path:number:"
+ // construct that terminal emulators are more likely
+ // to recognize and render as a clickable link. If
+ // navigate is enabled then there is already a good
+ // visual indicator of match lines (in addition to
+ // the grep-match-style highlighting) and so we use
+ // ":" for matches and non-matches alike.
+ &self.config.grep_separator_symbol
+ };
+ write!(
+ self.painter.writer,
+ "{}",
+ paint::paint_file_path_with_line_number(
+ grep_line.line_number,
+ &grep_line.path,
+ OUTPUT_CONFIG.pad_line_number,
+ separator,
+ true,
+ Some(self.config.grep_file_style),
+ Some(self.config.grep_line_number_style),
+ self.config
+ )
+ )?;
+
+ // Emit code line
+ let code_style_sections =
+ match (&grep_line.line_type, &grep_line.submatches) {
+ (LineType::Match, Some(submatches)) => {
+ // We expand tabs at this late stage because
+ // the tabs are escaped in the JSON, so
+ // expansion must come after JSON parsing.
+ // (At the time of writing, we are in this
+ // arm iff we are handling `ripgrep --json`
+ // output.)
+ grep_line.code = self
+ .painter
+ .expand_tabs(grep_line.code.graphemes(true))
+ .into();
+ make_style_sections(
+ &grep_line.code,
+ submatches,
+ self.config.grep_match_word_style,
+ self.config.grep_match_line_style,
+ )
+ }
+ (LineType::Match, None) => {
+ // HACK: We need tabs expanded, and we need
+ // the &str passed to
+ // `get_code_style_sections` to live long
+ // enough. But at the point it is guaranteed
+ // that this handler is going to handle this
+ // line, so mutating it is acceptable.
+ self.raw_line =
+ self.painter.expand_tabs(self.raw_line.graphemes(true));
+ get_code_style_sections(
+ &self.raw_line,
+ self.config.grep_match_word_style,
+ self.config.grep_match_line_style,
+ &grep_line,
+ )
+ .unwrap_or(
+ StyleSectionSpecifier::Style(
+ self.config.grep_match_line_style,
+ ),
+ )
+ }
+ _ => StyleSectionSpecifier::Style(
+ self.config.grep_context_line_style,
+ ),
+ };
+ self.painter.syntax_highlight_and_paint_line(
+ &format!("{}\n", grep_line.code),
+ code_style_sections,
+ self.state.clone(),
+ BgShouldFill::default(),
+ )
+ }
+ }
+ handled_line = true
+ }
+ }
+ Ok(handled_line)
+ }
+}
+
+fn make_style_sections<'a>(
+ line: &'a str,
+ submatches: &[(usize, usize)],
+ match_style: Style,
+ non_match_style: Style,
+) -> StyleSectionSpecifier<'a> {
+ let mut sections = Vec::new();
+ let mut curr = 0;
+ for (start_, end_) in submatches {
+ let (start, end) = (*start_, *end_);
+ if start > curr {
+ sections.push((non_match_style, &line[curr..start]))
+ };
+ sections.push((match_style, &line[start..end]));
+ curr = end;
+ }
+ if curr < line.len() {
+ sections.push((non_match_style, &line[curr..]))
+ }
+ StyleSectionSpecifier::StyleSections(sections)
+}
+
+// Return style sections describing colors received from git.
+fn get_code_style_sections<'b>(
+ raw_line: &'b str,
+ match_style: Style,
+ non_match_style: Style,
+ grep: &GrepLine,
+) -> Option<StyleSectionSpecifier<'b>> {
+ if let Some(raw_code_start) = ansi::ansi_preserving_index(
+ raw_line,
+ match grep.line_number {
+ Some(n) => format!("{}:{}:", grep.path, n).len(),
+ None => grep.path.len() + 1,
+ },
+ ) {
+ let match_style_sections = ansi::parse_style_sections(&raw_line[raw_code_start..])
+ .iter()
+ .map(|(ansi_term_style, s)| {
+ if ansi_term_style.foreground.is_some() {
+ (match_style, *s)
+ } else {
+ (non_match_style, *s)
+ }
+ })
+ .collect();
+ Some(StyleSectionSpecifier::StyleSections(match_style_sections))
+ } else {
+ None
+ }
+}
+
+fn make_output_config() -> GrepOutputConfig {
+ match utils::process::git_grep_command_options() {
+ Some((longs, shorts)) if shorts.contains("-W") || longs.contains("--function-context") => {
+ // --function-context is in effect: i.e. the entire function is
+ // being displayed. In that case we don't render the first line as a
+ // header, since the second line is the true next line, and it will
+ // be more readable to have these displayed normally. We do add the
+ // navigate marker, since match lines will be surrounded by (many)
+ // non-match lines. And, since we are printing (many) successive lines
+ // of code, we pad line numbers <100 in order to maintain code
+ // alignment up to line 9999.
+ GrepOutputConfig {
+ render_context_header_as_hunk_header: false,
+ add_navigate_marker_to_matches: true,
+ pad_line_number: true,
+ }
+ }
+ Some((longs, shorts)) if shorts.contains("-p") || longs.contains("--show-function") => {
+ // --show-function is in effect, i.e. the function header is being
+ // displayed, along with matches within the function. Therefore we
+ // render the first line as a header, but we do not add the navigate
+ // marker, since all non-header lines are matches.
+ GrepOutputConfig {
+ render_context_header_as_hunk_header: true,
+ add_navigate_marker_to_matches: false,
+ pad_line_number: false,
+ }
+ }
+ _ => GrepOutputConfig {
+ render_context_header_as_hunk_header: true,
+ add_navigate_marker_to_matches: false,
+ pad_line_number: false,
+ },
+ }
+}
+
+enum GrepLineRegex {
+ FilePathWithFileExtension,
+ FilePathWithoutSeparatorCharacters,
+}
+
+lazy_static! {
+ static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION: Regex =
+ make_grep_line_regex(GrepLineRegex::FilePathWithFileExtension);
+}
+
+lazy_static! {
+ static ref GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS: Regex =
+ make_grep_line_regex(GrepLineRegex::FilePathWithoutSeparatorCharacters);
+}
+
+// See tests for example grep lines
+fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex {
+ // Grep tools such as `git grep` and `rg` emit lines like the following,
+ // where "xxx" represents arbitrary code. Note that there are 3 possible
+ // "separator characters": ':', '-', '='.
+
+ // The format is ambiguous, but we attempt to parse it.
+
+ // src/co-7-fig.rs:xxx
+ // src/co-7-fig.rs:7:xxx
+ // src/co-7-fig.rs-xxx
+ // src/co-7-fig.rs-7-xxx
+ // src/co-7-fig.rs=xxx
+ // src/co-7-fig.rs=7=xxx
+
+ // Makefile:xxx
+ // Makefile:7:xxx
+ // Makefile-xxx
+ // Makefile-7-xxx
+
+ // Make-7-file:xxx
+ // Make-7-file:7:xxx
+ // Make-7-file-xxx
+ // Make-7-file-7-xxx
+
+ let file_path = match regex_variant {
+ GrepLineRegex::FilePathWithFileExtension => {
+ r"
+ ( # 1. file name (colons not allowed)
+ [^:\ ] # a file name cannot start with whitespace
+ [^:]* # anything
+ \.[^.\ :=-]{1,6} # extension
+ )
+ "
+ }
+ GrepLineRegex::FilePathWithoutSeparatorCharacters => {
+ r"
+ ( # 1. file name (colons not allowed)
+ [^:\ =-] # a file name cannot start with whitespace
+ [^:=-]* # anything except separators
+ [^:\ ] # a file name cannot end with whitespace
+ )
+ "
+ }
+ };
+
+ Regex::new(&format!(
+ "(?x)
+^
+{file_path}
+(?:
+ (
+ : # 2. match marker
+ (?:([0-9]+):)? # 3. optional: line number followed by second match marker
+ )
+ |
+ (
+ - # 4. nomatch marker
+ (?:([0-9]+)-)? # 5. optional: line number followed by second nomatch marker
+ )
+ |
+ (
+ = # 6. match marker
+ (?:([0-9]+)=)? # 7. optional: line number followed by second header marker
+ )
+)
+(.*) # 8. code (i.e. line contents)
+$
+",
+ file_path = file_path
+ ))
+ .unwrap()
+}
+
+pub fn parse_grep_line(line: &str) -> Option<GrepLine> {
+ if line.starts_with('{') {
+ return ripgrep_json::parse_line(line);
+ }
+ [
+ &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION,
+ &*GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS,
+ ]
+ .iter()
+ .find_map(|regex| _parse_grep_line(*regex, line))
+}
+
+pub fn _parse_grep_line<'b>(regex: &Regex, line: &'b str) -> Option<GrepLine<'b>> {
+ let caps = regex.captures(line)?;
+ let file = caps.get(1).unwrap().as_str().into();
+ let (line_type, line_number) = &[
+ (2, LineType::Match),
+ (4, LineType::Context),
+ (6, LineType::ContextHeader),
+ ]
+ .iter()
+ .find_map(|(i, line_type)| {
+ if caps.get(*i).is_some() {
+ let line_number: Option<usize> =
+ caps.get(i + 1).map(|m| m.as_str().parse().ok()).flatten();
+ Some((*line_type, line_number))
+ } else {
+ None
+ }
+ })
+ .unwrap(); // The regex matches so one of the three alternatrives must have matched
+ let code = caps.get(8).unwrap().as_str().into();
+
+ Some(GrepLine {
+ path: file,
+ line_number: *line_number,
+ line_type: *line_type,
+ code,
+ submatches: None,
+ })
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::handlers::grep::{parse_grep_line, GrepLine, LineType};
+
+ #[test]
+ fn test_parse_grep_match() {
+ assert_eq!(
+ parse_grep_line("src/co-7-fig.rs:xxx"),
+ Some(GrepLine {
+ path: "src/co-7-fig.rs".into(),
+ line_number: None,
+ line_type: LineType::Match,
+ code: "xxx".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line("src/config.rs:use crate::minusplus::MinusPlus;"),
+ Some(GrepLine {
+ path: "src/config.rs".into(),
+ line_number: None,
+ line_type: LineType::Match,
+ code: "use crate::minusplus::MinusPlus;".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line(
+ "src/config.rs: pub line_numbers_style_minusplus: MinusPlus<Style>,"
+ ),
+ Some(GrepLine {
+ path: "src/config.rs".into(),
+ line_number: None,
+ line_type: LineType::Match,
+ code: " pub line_numbers_style_minusplus: MinusPlus<Style>,".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line("src/con-fig.rs:use crate::minusplus::MinusPlus;"),
+ Some(GrepLine {
+ path: "src/con-fig.rs".into(),
+ line_number: None,
+ line_type: LineType::Match,
+ code: "use crate::minusplus::MinusPlus;".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line(
+ "src/con-fig.rs: pub line_numbers_style_minusplus: MinusPlus<Style>,"
+ ),
+ Some(GrepLine {
+ path: "src/con-fig.rs".into(),
+ line_number: None,
+ line_type: LineType::Match,
+ code: " pub line_numbers_style_minusplus: MinusPlus<Style>,".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line(
+ "src/de lta.rs:pub fn delta<I>(lines: ByteLines<I>, writer: &mut dyn Write, config: &Config) -> std::io::Result<()>"
+ ),
+ Some(GrepLine {
+ path: "src/de lta.rs".into(),
+ line_number: None,
+ line_type: LineType::Match,
+ code: "pub fn delta<I>(lines: ByteLines<I>, writer: &mut dyn Write, config: &Config) -> std::io::Result<()>".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line(
+ "src/de lta.rs: pub fn new(writer: &'a mut dyn Write, config: &'a Config) -> Self {"
+ ),
+ Some(GrepLine {
+ path: "src/de lta.rs".into(),
+ line_number: None,
+ line_type: LineType::Match,
+ code: " pub fn new(writer: &'a mut dyn Write, config: &'a Config) -> Self {".into(),
+ submatches: None,
+ })
+ );
+ }
+
+ #[test]
+ fn test_parse_grep_n_match() {
+ assert_eq!(
+ parse_grep_line("src/co-7-fig.rs:7:xxx"),
+ Some(GrepLine {
+ path: "src/co-7-fig.rs".into(),
+ line_number: Some(7),
+ line_type: LineType::Match,
+ code: "xxx".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line("src/config.rs:21:use crate::minusplus::MinusPlus;"),
+ Some(GrepLine {
+ path: "src/config.rs".into(),
+ line_number: Some(21),
+ line_type: LineType::Match,
+ code: "use crate::minusplus::MinusPlus;".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line(
+ "src/config.rs:95: pub line_numbers_style_minusplus: MinusPlus<Style>,"
+ ),
+ Some(GrepLine {
+ path: "src/config.rs".into(),
+ line_number: Some(95),
+ line_type: LineType::Match,
+ code: " pub line_numbers_style_minusplus: MinusPlus<Style>,".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line("Makefile:10:test: unit-test end-to-end-test"),
+ Some(GrepLine {
+ path: "Makefile".into(),
+ line_number: Some(10),
+ line_type: LineType::Match,
+ code: "test: unit-test end-to-end-test".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line(
+ "Makefile:16: ./tests/test_raw_output_matches_git_on_full_repo_history"
+ ),
+ Some(GrepLine {
+ path: "Makefile".into(),
+ line_number: Some(16),
+ line_type: LineType::Match,
+ code: " ./tests/test_raw_output_matches_git_on_full_repo_history".into(),
+ submatches: None,
+ })
+ );
+ }
+
+ #[test]
+ #[ignore]
+ fn test_parse_grep_n_match_file_name_with_dashes_and_no_extension() {
+ // This fails: we can't parse it currently.
+ assert_eq!(
+ parse_grep_line("etc/examples/119-within-line-edits:4:repo=$(mktemp -d)"),
+ Some(GrepLine {
+ path: "etc/examples/119-within-line-edits".into(),
+ line_number: Some(4),
+ line_type: LineType::Match,
+ code: "repo=$(mktemp -d)".into(),
+ submatches: None,
+ })
+ );
+ }
+
+ #[test]
+ fn test_parse_grep_no_match() {
+ assert_eq!(
+ parse_grep_line("src/co-7-fig.rs-xxx"),
+ Some(GrepLine {
+ path: "src/co-7-fig.rs".into(),
+ line_number: None,
+ line_type: LineType::Context,
+ code: "xxx".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line("src/config.rs- pub available_terminal_width: usize,"),
+ Some(GrepLine {
+ path: "src/config.rs".into(),
+ line_number: None,
+ line_type: LineType::Context,
+ code: " pub available_terminal_width: usize,".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line("src/con-fig.rs-use crate::minusplus::MinusPlus;"),
+ Some(GrepLine {
+ path: "src/con-fig.rs".into(),
+ line_number: None,
+ line_type: LineType::Context,
+ code: "use crate::minusplus::MinusPlus;".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line("de-lta.rs- if self.source == Source::Unknown {"),
+ Some(GrepLine {
+ path: "de-lta.rs".into(),
+ line_number: None,
+ line_type: LineType::Context,
+ code: " if self.source == Source::Unknown {".into(),
+ submatches: None,
+ })
+ );
+ }
+
+ #[test]
+ fn test_parse_grep_n_no_match() {
+ assert_eq!(
+ parse_grep_line("src/co-7-fig.rs-7-xxx"),
+ Some(GrepLine {
+ path: "src/co-7-fig.rs".into(),
+ line_number: Some(7),
+ line_type: LineType::Context,
+ code: "xxx".into(),
+ submatches: None,
+ })
+ );
+ assert_eq!(
+ parse_grep_line("src/config.rs-58- pub available_terminal_width: usize,"),
+ Some(GrepLine {
+ path: "src/config.rs".into(),
+ line_number: Some(58),
+ line_type: LineType::Context,
+ code: " pub available_terminal_width: usize,".into(),
+ submatches: None,
+ })
+ );
+ }
+
+ #[test]
+ fn test_parse_grep_match_no_extension() {
+ assert_eq!(
+ parse_grep_line("Makefile:xxx"),
+ Some(GrepLine {
+ path: "Makefile".into(),
+ line_number: None,
+ line_type: LineType::Match,
+ code: "xxx".into(),
+ submatches: None,
+ })
+ );
+ }
+
+ #[test]
+ fn test_parse_grep_n_match_no_extension() {
+ assert_eq!(
+ parse_grep_line("Makefile:7:xxx"),
+ Some(GrepLine {
+ path: "Makefile".into(),
+ line_number: Some(7),
+ line_type: LineType::Match,
+ code: "xxx".into(),
+ submatches: None,
+ })
+ );
+ }
+
+ #[test]
+ #[allow(non_snake_case)]
+ fn test_parse_grep_W_context_header() {
+ // git grep -W
+ assert_eq!(
+ parse_grep_line("src/config.rs=pub struct Config {"), // match
+ Some(GrepLine {
+ path: "src/config.rs".into(),
+ line_number: None,
+ line_type: LineType::ContextHeader,
+ code: "pub struct Config {".into(),
+ submatches: None,
+ })
+ );
+ }
+
+ #[test]
+ #[allow(non_snake_case)]
+ fn test_parse_grep_W_n_context_header() {
+ // git grep -n -W
+ assert_eq!(
+ parse_grep_line("src/config.rs=57=pub struct Config {"),
+ Some(GrepLine {
+ path: "src/config.rs".into(),
+ line_number: Some(57),
+ line_type: LineType::ContextHeader,
+ code: "pub struct Config {".into(),
+ submatches: None,
+ })
+ );
+ }
+}