summaryrefslogtreecommitdiffstats
path: root/grep-printer
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2019-01-26 12:25:21 -0500
committerAndrew Gallant <jamslam@gmail.com>2019-01-26 12:34:28 -0500
commit9d703110cfe01782d2d0b03a340f5983da215e68 (patch)
treea3d7e184ba2753485733f92c31cf820bd9b9fe74 /grep-printer
parente99b6bda0ef469ddeb65fb2f39e0c47b64c20bda (diff)
regex: make CRLF hack more robust
This commit improves the CRLF hack to be more robust. In particular, in addition to rewriting `$` as `(?:\r??$)`, we now strip `\r` from the end of a match if and only if the regex has an ending line anchor required for a match. This doesn't quite make the hack 100% correct, but should fix most use cases in practice. An example of a regex that will still be incorrect is `foo|bar$`, since the analysis isn't quite sophisticated enough to determine that a `\r` can be safely stripped from any match. Even if we fix that, regexes like `foo\r|bar$` still won't be handled correctly. Alas, more work on this front should really be focused on enabling this in the regex engine itself. The specific cause of this bug was that grep-searcher was sneakily stripping CRLF from matching lines when it really shouldn't have. We remove that code now, and instead rely on better match semantics provided at a lower level. Fixes #1095
Diffstat (limited to 'grep-printer')
-rw-r--r--grep-printer/src/json.rs44
1 files changed, 43 insertions, 1 deletions
diff --git a/grep-printer/src/json.rs b/grep-printer/src/json.rs
index 45d6d682..50387bf8 100644
--- a/grep-printer/src/json.rs
+++ b/grep-printer/src/json.rs
@@ -817,7 +817,8 @@ impl<'a> SubMatches<'a> {
#[cfg(test)]
mod tests {
- use grep_regex::RegexMatcher;
+ use grep_regex::{RegexMatcher, RegexMatcherBuilder};
+ use grep_matcher::LineTerminator;
use grep_searcher::SearcherBuilder;
use super::{JSON, JSONBuilder};
@@ -918,4 +919,45 @@ and exhibited clearly, with a label attached.\
assert_eq!(got.lines().count(), 2);
assert!(got.contains("begin") && got.contains("end"));
}
+
+ #[test]
+ fn missing_crlf() {
+ let haystack = "test\r\n".as_bytes();
+
+ let matcher = RegexMatcherBuilder::new()
+ .build("test")
+ .unwrap();
+ let mut printer = JSONBuilder::new()
+ .build(vec![]);
+ SearcherBuilder::new()
+ .build()
+ .search_reader(&matcher, haystack, printer.sink(&matcher))
+ .unwrap();
+ let got = printer_contents(&mut printer);
+ assert_eq!(got.lines().count(), 3);
+ assert!(
+ got.lines().nth(1).unwrap().contains(r"test\r\n"),
+ r"missing 'test\r\n' in '{}'",
+ got.lines().nth(1).unwrap(),
+ );
+
+ let matcher = RegexMatcherBuilder::new()
+ .crlf(true)
+ .build("test")
+ .unwrap();
+ let mut printer = JSONBuilder::new()
+ .build(vec![]);
+ SearcherBuilder::new()
+ .line_terminator(LineTerminator::crlf())
+ .build()
+ .search_reader(&matcher, haystack, printer.sink(&matcher))
+ .unwrap();
+ let got = printer_contents(&mut printer);
+ assert_eq!(got.lines().count(), 3);
+ assert!(
+ got.lines().nth(1).unwrap().contains(r"test\r\n"),
+ r"missing 'test\r\n' in '{}'",
+ got.lines().nth(1).unwrap(),
+ );
+ }
}