summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Davison <dandavison7@gmail.com>2022-01-05 16:51:00 -0500
committerGitHub <noreply@github.com>2022-01-05 16:51:00 -0500
commit040ad767418b37d57c3215cbf6af2dfb9f0b6813 (patch)
tree145274951211698b7e2101592ac0db92980ef716
parent48fec2e6fdede01ed32f28c9083bdc42ef300615 (diff)
Fix grep parse bugs (#865)
Evolve grep output parsing heuristics * Demand a non-space before extension * New grep parse heuristic If something like any of the following are seen then that is assumed to be a file name with an extension followed by a line number. I.e. we do not support file names with such patterns internally. .xx-7- .xx=7= .xx:7:
-rw-r--r--src/handlers/grep.rs143
1 files changed, 119 insertions, 24 deletions
diff --git a/src/handlers/grep.rs b/src/handlers/grep.rs
index c3d6da6c..3bff9c44 100644
--- a/src/handlers/grep.rs
+++ b/src/handlers/grep.rs
@@ -288,18 +288,30 @@ fn make_output_config() -> GrepOutputConfig {
}
enum GrepLineRegex {
- FilePathWithFileExtension,
- FilePathWithoutSeparatorCharacters,
+ WithFileExtensionAndLineNumber,
+ WithFileExtension,
+ WithFileExtensionNoSpaces,
+ WithoutSeparatorCharacters,
+}
+
+lazy_static! {
+ static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_AND_LINE_NUMBER: Regex =
+ make_grep_line_regex(GrepLineRegex::WithFileExtensionAndLineNumber);
+}
+
+lazy_static! {
+ static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_NO_SPACES: Regex =
+ make_grep_line_regex(GrepLineRegex::WithFileExtensionNoSpaces);
}
lazy_static! {
static ref GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION: Regex =
- make_grep_line_regex(GrepLineRegex::FilePathWithFileExtension);
+ make_grep_line_regex(GrepLineRegex::WithFileExtension);
}
lazy_static! {
static ref GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS: Regex =
- make_grep_line_regex(GrepLineRegex::FilePathWithoutSeparatorCharacters);
+ make_grep_line_regex(GrepLineRegex::WithoutSeparatorCharacters);
}
// See tests for example grep lines
@@ -328,16 +340,24 @@ fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex {
// Make-7-file-7-xxx
let file_path = match regex_variant {
- GrepLineRegex::FilePathWithFileExtension => {
+ GrepLineRegex::WithFileExtensionAndLineNumber | GrepLineRegex::WithFileExtension => {
r"
( # 1. file name (colons not allowed)
[^:|\ ] # try to be strict about what a file path can start with
[^:]* # anything
- \.[^.\ :=-]{1,6} # extension
+ [^\ ]\.[^.\ :=-]{1,6} # extension
)
"
}
- GrepLineRegex::FilePathWithoutSeparatorCharacters => {
+ GrepLineRegex::WithFileExtensionNoSpaces => {
+ r"
+ ( # 1. file name (colons not allowed)
+ [^:|\ ]+ # try to be strict about what a file path can start with
+ [^\ ]\.[^.\ :=-]{1,6} # extension
+ )
+ "
+ }
+ GrepLineRegex::WithoutSeparatorCharacters => {
r"
( # 1. file name (colons not allowed)
[^:|\ =-] # try to be strict about what a file path can start with
@@ -348,30 +368,59 @@ fn make_grep_line_regex(regex_variant: GrepLineRegex) -> Regex {
}
};
+ let separator = match regex_variant {
+ GrepLineRegex::WithFileExtensionAndLineNumber => {
+ r#"
+ (?:
+ (
+ : # 2. match marker
+ ([0-9]+): # 3. line number followed by second match marker
+ )
+ |
+ (
+ - # 4. nomatch marker
+ ([0-9]+)- # 5. line number followed by second nomatch marker
+ )
+ |
+ (
+ = # 6. match marker
+ ([0-9]+)= # 7. line number followed by second header marker
+ )
+ )
+ "#
+ }
+ _ => {
+ r#"
+ (?:
+ (
+ : # 2. match marker
+ (?:([0-9]+):)? # 3. optional: line number followed by second match marker
+ )
+ |
+ (
+ - # 4. nomatch marker
+ (?:([0-9]+)-)? # 5. optional: line number followed by second nomatch marker
+ )
+ |
+ (
+ = # 6. match marker
+ (?:([0-9]+)=)? # 7. optional: line number followed by second header marker
+ )
+ )
+ "#
+ }
+ };
+
Regex::new(&format!(
"(?x)
^
{file_path}
-(?:
- (
- : # 2. match marker
- (?:([0-9]+):)? # 3. optional: line number followed by second match marker
- )
- |
- (
- - # 4. nomatch marker
- (?:([0-9]+)-)? # 5. optional: line number followed by second nomatch marker
- )
- |
- (
- = # 6. match marker
- (?:([0-9]+)=)? # 7. optional: line number followed by second header marker
- )
-)
+{separator}
(.*) # 8. code (i.e. line contents)
$
",
- file_path = file_path
+ file_path = file_path,
+ separator = separator,
))
.unwrap()
}
@@ -382,6 +431,8 @@ pub fn parse_grep_line(line: &str) -> Option<GrepLine> {
} else {
match &*process::calling_process() {
process::CallingProcess::GitGrep(_) | process::CallingProcess::OtherGrep => [
+ &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_AND_LINE_NUMBER,
+ &*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION_NO_SPACES,
&*GREP_LINE_REGEX_ASSUMING_FILE_EXTENSION,
&*GREP_LINE_REGEX_ASSUMING_NO_INTERNAL_SEPARATOR_CHARS,
]
@@ -654,6 +705,28 @@ mod tests {
submatches: None,
})
);
+
+ assert_eq!(
+ parse_grep_line(r#"aaa/bbb.scala- s"xxx.yyy.zzz: $ccc ddd""#),
+ Some(GrepLine {
+ path: "aaa/bbb.scala".into(),
+ line_number: None,
+ line_type: LineType::Context,
+ code: r#" s"xxx.yyy.zzz: $ccc ddd""#.into(),
+ submatches: None,
+ })
+ );
+
+ assert_eq!(
+ parse_grep_line(r#"aaa/bbb.scala- val atRegex = Regex.compile("(@.*)|(-shdw@.*)""#),
+ Some(GrepLine {
+ path: "aaa/bbb.scala".into(),
+ line_number: None,
+ line_type: LineType::Context,
+ code: r#" val atRegex = Regex.compile("(@.*)|(-shdw@.*)""#.into(),
+ submatches: None,
+ })
+ );
}
#[test]
@@ -684,6 +757,28 @@ mod tests {
submatches: None,
})
);
+
+ assert_eq!(
+ parse_grep_line(r#"foo.rs-12- .x-"#),
+ Some(GrepLine {
+ path: "foo.rs".into(),
+ line_number: Some(12),
+ line_type: LineType::Context,
+ code: r#" .x-"#.into(),
+ submatches: None,
+ })
+ );
+
+ assert_eq!(
+ parse_grep_line(r#"foo.rs-12-.x-"#),
+ Some(GrepLine {
+ path: "foo.rs".into(),
+ line_number: Some(12),
+ line_type: LineType::Context,
+ code: r#".x-"#.into(),
+ submatches: None,
+ })
+ );
}
#[test]