diff options
author | Dan Davison <dandavison7@gmail.com> | 2022-01-05 16:51:00 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-05 16:51:00 -0500 |
commit | 040ad767418b37d57c3215cbf6af2dfb9f0b6813 (patch) | |
tree | 145274951211698b7e2101592ac0db92980ef716 | |
parent | 48fec2e6fdede01ed32f28c9083bdc42ef300615 (diff) |
Fix grep parse bugs (#865)
Evolve grep output parsing heuristics
* Demand a non-space before extension
* New grep parse heuristic
If something like any of the following are seen then that is assumed
to be a file name with an extension followed by a line number. I.e. we
do not support file names with such patterns internally.
.xx-7-
.xx=7=
.xx:7:
-rw-r--r-- | src/handlers/grep.rs | 143 |
1 files changed, 119 insertions, 24 deletions
diff --git a/src/handlers/grep.rs b/src/handlers/grep.rs index c3d6da6c..3bff9c44 100644 --- a/src/handlers/grep.rs +++ b/src/handlers/grep.rs @@ -288,18 +288,30 @@ fn make_output_config() -> GrepOutputConfig { } enum GrepLineRegex { - FilePathWithFileExtension, - FilePathWithoutSeparatorCharacters, + WithFileExtensionAndLineNumber, + WithFileExtension, + WithFileExtensionNoSpaces, + WithoutSeparatorCharacters, +} + +lazy_static! { + static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_AND_LINE_NUMBER: Regex = + make_grep_line_regex(GrepLineRegex::WithFileExtensionAndLineNumber); +} + +lazy_static! { + static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_NO_SPACES: Regex = + make_grep_line_regex(GrepLineRegex::WithFileExtensionNoSpaces); } lazy_static! { static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION: Regex = - make_grep_line_regex(GrepLineRegex::FilePathWithFileExtension); + make_grep_line_regex(GrepLineRegex::WithFileExtension); } lazy_static! { static ref GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS: Regex = - make_grep_line_regex(GrepLineRegex::FilePathWithoutSeparatorCharacters); + make_grep_line_regex(GrepLineRegex::WithoutSeparatorCharacters); } // See tests for example grep lines @@ -328,16 +340,24 @@ fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex { // Make-7-file-7-xxx let file_path = match regex_variant { - GrepLineRegex::FilePathWithFileExtension => { + GrepLineRegex::WithFileExtensionAndLineNumber | GrepLineRegex::WithFileExtension => { r" ( # 1. file name (colons not allowed) [^:|\ ] # try to be strict about what a file path can start with [^:]* # anything - \.[^.\ :=-]{1,6} # extension + [^\ ]\.[^.\ :=-]{1,6} # extension ) " } - GrepLineRegex::FilePathWithoutSeparatorCharacters => { + GrepLineRegex::WithFileExtensionNoSpaces => { + r" + ( # 1. file name (colons not allowed) + [^:|\ ]+ # try to be strict about what a file path can start with + [^\ ]\.[^.\ :=-]{1,6} # extension + ) + " + } + GrepLineRegex::WithoutSeparatorCharacters => { r" ( # 1. file name (colons not allowed) [^:|\ =-] # try to be strict about what a file path can start with @@ -348,30 +368,59 @@ fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex { } }; + let separator = match regex_variant { + GrepLineRegex::WithFileExtensionAndLineNumber => { + r#" + (?: + ( + : # 2. match marker + ([0-9]+): # 3. line number followed by second match marker + ) + | + ( + - # 4. nomatch marker + ([0-9]+)- # 5. line number followed by second nomatch marker + ) + | + ( + = # 6. match marker + ([0-9]+)= # 7. line number followed by second header marker + ) + ) + "# + } + _ => { + r#" + (?: + ( + : # 2. match marker + (?:([0-9]+):)? # 3. optional: line number followed by second match marker + ) + | + ( + - # 4. nomatch marker + (?:([0-9]+)-)? # 5. optional: line number followed by second nomatch marker + ) + | + ( + = # 6. match marker + (?:([0-9]+)=)? # 7. optional: line number followed by second header marker + ) + ) + "# + } + }; + Regex::new(&format!( "(?x) ^ {file_path} -(?: - ( - : # 2. match marker - (?:([0-9]+):)? # 3. optional: line number followed by second match marker - ) - | - ( - - # 4. nomatch marker - (?:([0-9]+)-)? # 5. optional: line number followed by second nomatch marker - ) - | - ( - = # 6. match marker - (?:([0-9]+)=)? # 7. optional: line number followed by second header marker - ) -) +{separator} (.*) # 8. code (i.e. line contents) $ ", - file_path = file_path + file_path = file_path, + separator = separator, )) .unwrap() } @@ -382,6 +431,8 @@ pub fn parse_grep_line(line: &str) -> Option<GrepLine> { } else { match &*process::calling_process() { process::CallingProcess::GitGrep(_) | process::CallingProcess::OtherGrep => [ + &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_AND_LINE_NUMBER, + &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_NO_SPACES, &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION, &*GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS, ] @@ -654,6 +705,28 @@ mod tests { submatches: None, }) ); + + assert_eq!( + parse_grep_line(r#"aaa/bbb.scala- s"xxx.yyy.zzz: $ccc ddd""#), + Some(GrepLine { + path: "aaa/bbb.scala".into(), + line_number: None, + line_type: LineType::Context, + code: r#" s"xxx.yyy.zzz: $ccc ddd""#.into(), + submatches: None, + }) + ); + + assert_eq!( + parse_grep_line(r#"aaa/bbb.scala- val atRegex = Regex.compile("(@.*)|(-shdw@.*)""#), + Some(GrepLine { + path: "aaa/bbb.scala".into(), + line_number: None, + line_type: LineType::Context, + code: r#" val atRegex = Regex.compile("(@.*)|(-shdw@.*)""#.into(), + submatches: None, + }) + ); } #[test] @@ -684,6 +757,28 @@ mod tests { submatches: None, }) ); + + assert_eq!( + parse_grep_line(r#"foo.rs-12- .x-"#), + Some(GrepLine { + path: "foo.rs".into(), + line_number: Some(12), + line_type: LineType::Context, + code: r#" .x-"#.into(), + submatches: None, + }) + ); + + assert_eq!( + parse_grep_line(r#"foo.rs-12-.x-"#), + Some(GrepLine { + path: "foo.rs".into(), + line_number: Some(12), + line_type: LineType::Context, + code: r#".x-"#.into(), + submatches: None, + }) + ); } #[test] |