diff options
author | Andrew Gallant <jamslam@gmail.com> | 2021-05-31 19:00:56 -0400 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2021-05-31 21:51:18 -0400 |
commit | ee23ab51736d35028c7f3eee23295429377742b4 (patch) | |
tree | 2e43d4ded4f3e76e5b6a8c68c6bc005ec8a326a3 | |
parent | efd9cfb2fc1f0233de9eda4c03416d32ef2c3ce8 (diff) |
printer: trim line terminator before finding submatches
This fixes a bug where PCRE2 look-around could change the result of a
match if it observed a line terminator in the printer. And in
particular, this is precisely how the searcher operates: the line is
considered unto itself *without* the line terminator.
Fixes #1401
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | crates/printer/src/standard.rs | 15 | ||||
-rw-r--r-- | crates/printer/src/util.rs | 25 | ||||
-rw-r--r-- | tests/regression.rs | 27 |
4 files changed, 55 insertions, 14 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ba9bd56..2144c5e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,6 +71,8 @@ Bug fixes: Document cygwin path translation behavior in the FAQ. * [BUG #1311](https://github.com/BurntSushi/ripgrep/issues/1311): Fix multi-line bug where a search & replace for `\n` didn't work as expected. +* [BUG #1401](https://github.com/BurntSushi/ripgrep/issues/1401): + Fix buggy interaction between PCRE2 look-around and `-o/--only-matching`. * [BUG #1412](https://github.com/BurntSushi/ripgrep/issues/1412): Fix multi-line bug with searches using look-around past matching lines. * [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642): diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index 0c853f1b..c4675e43 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -17,7 +17,8 @@ use color::ColorSpecs; use counter::CounterWriter; use stats::Stats; use util::{ - find_iter_at_in_context, trim_ascii_prefix, PrinterPath, Replacer, Sunk, + find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator, + PrinterPath, Replacer, Sunk, }; /// The configuration for the standard printer. @@ -1547,17 +1548,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { } fn trim_line_terminator(&self, buf: &[u8], line: &mut Match) { - let lineterm = self.searcher.line_terminator(); - if lineterm.is_suffix(&buf[*line]) { - let mut end = line.end() - 1; - if lineterm.is_crlf() - && end > 0 - && buf.get(end - 1) == Some(&b'\r') - { - end -= 1; - } - *line = line.with_end(end); - } + trim_line_terminator(&self.searcher, buf, line); } fn has_line_terminator(&self, buf: &[u8]) -> bool { diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index 37e56529..e19d4a8b 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -418,7 +418,12 @@ where bytes = &bytes[..range.end + MAX_LOOK_AHEAD]; } } else { - bytes = &bytes[..range.end]; + // When searching a single line, we should remove the line terminator. + // Otherwise, it's possible for the regex (via look-around) to observe + // the line terminator and not match because of it. + let mut m = Match::new(0, range.end); + trim_line_terminator(searcher, bytes, &mut m); + bytes = &bytes[..m.end()]; } matcher .find_iter_at(bytes, range.start, |m| { @@ -429,3 +434,21 @@ where }) .map_err(io::Error::error_message) } + +/// Given a buf and some bounds, if there is a line terminator at the end of +/// the given bounds in buf, then the bounds are trimmed to remove the line +/// terminator. +pub fn trim_line_terminator( + searcher: &Searcher, + buf: &[u8], + line: &mut Match, +) { + let lineterm = searcher.line_terminator(); + if lineterm.is_suffix(&buf[*line]) { + let mut end = line.end() - 1; + if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') { + end -= 1; + } + *line = line.with_end(end); + } +} diff --git a/tests/regression.rs b/tests/regression.rs index d110c99c..cd14d5e4 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -806,7 +806,32 @@ rgtest!(r1389_bad_symlinks_no_biscuit, |dir: Dir, mut cmd: TestCommand| { eqnice!("mylink/file.txt:test\n", stdout); }); -// printf "foo\nbar\n" | rg -PU '(?<=foo\n)bar' -r quux +// See: https://github.com/BurntSushi/ripgrep/issues/1401 +rgtest!(r1401_look_ahead_only_matching_1, |dir: Dir, mut cmd: TestCommand| { + // Only PCRE2 supports look-around. + if !dir.is_pcre2() { + return; + } + dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\n"); + cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]); + eqnice!("xo\ncat\tdo\n", cmd.stdout()); + + let mut cmd = dir.command(); + cmd.args(&["-No", r".*o(?!.*[ \t])", "ip.txt"]); + eqnice!("xo\ncat\tdo\n", cmd.stdout()); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/1401 +rgtest!(r1401_look_ahead_only_matching_2, |dir: Dir, mut cmd: TestCommand| { + // Only PCRE2 supports look-around. + if !dir.is_pcre2() { + return; + } + dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\nfoo"); + cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]); + eqnice!("xo\ncat\tdo\nfoo\n", cmd.stdout()); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/1412 rgtest!(r1412_look_behind_no_replacement, |dir: Dir, mut cmd: TestCommand| { // Only PCRE2 supports look-around. |