summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2021-05-31 19:00:56 -0400
committerAndrew Gallant <jamslam@gmail.com>2021-05-31 21:51:18 -0400
commitee23ab51736d35028c7f3eee23295429377742b4 (patch)
tree2e43d4ded4f3e76e5b6a8c68c6bc005ec8a326a3
parentefd9cfb2fc1f0233de9eda4c03416d32ef2c3ce8 (diff)
printer: trim line terminator before finding submatches
This fixes a bug where PCRE2 look-around could change the result of a match if it observed a line terminator in the printer. And in particular, this is precisely how the searcher operates: the line is considered unto itself *without* the line terminator. Fixes #1401
-rw-r--r--CHANGELOG.md2
-rw-r--r--crates/printer/src/standard.rs15
-rw-r--r--crates/printer/src/util.rs25
-rw-r--r--tests/regression.rs27
4 files changed, 55 insertions, 14 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ba9bd56..2144c5e4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -71,6 +71,8 @@ Bug fixes:
Document cygwin path translation behavior in the FAQ.
* [BUG #1311](https://github.com/BurntSushi/ripgrep/issues/1311):
Fix multi-line bug where a search & replace for `\n` didn't work as expected.
+* [BUG #1401](https://github.com/BurntSushi/ripgrep/issues/1401):
+ Fix buggy interaction between PCRE2 look-around and `-o/--only-matching`.
* [BUG #1412](https://github.com/BurntSushi/ripgrep/issues/1412):
Fix multi-line bug with searches using look-around past matching lines.
* [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642):
diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs
index 0c853f1b..c4675e43 100644
--- a/crates/printer/src/standard.rs
+++ b/crates/printer/src/standard.rs
@@ -17,7 +17,8 @@ use color::ColorSpecs;
use counter::CounterWriter;
use stats::Stats;
use util::{
- find_iter_at_in_context, trim_ascii_prefix, PrinterPath, Replacer, Sunk,
+ find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator,
+ PrinterPath, Replacer, Sunk,
};
/// The configuration for the standard printer.
@@ -1547,17 +1548,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
}
fn trim_line_terminator(&self, buf: &[u8], line: &mut Match) {
- let lineterm = self.searcher.line_terminator();
- if lineterm.is_suffix(&buf[*line]) {
- let mut end = line.end() - 1;
- if lineterm.is_crlf()
- && end > 0
- && buf.get(end - 1) == Some(&b'\r')
- {
- end -= 1;
- }
- *line = line.with_end(end);
- }
+ trim_line_terminator(&self.searcher, buf, line);
}
fn has_line_terminator(&self, buf: &[u8]) -> bool {
diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs
index 37e56529..e19d4a8b 100644
--- a/crates/printer/src/util.rs
+++ b/crates/printer/src/util.rs
@@ -418,7 +418,12 @@ where
bytes = &bytes[..range.end + MAX_LOOK_AHEAD];
}
} else {
- bytes = &bytes[..range.end];
+ // When searching a single line, we should remove the line terminator.
+ // Otherwise, it's possible for the regex (via look-around) to observe
+ // the line terminator and not match because of it.
+ let mut m = Match::new(0, range.end);
+ trim_line_terminator(searcher, bytes, &mut m);
+ bytes = &bytes[..m.end()];
}
matcher
.find_iter_at(bytes, range.start, |m| {
@@ -429,3 +434,21 @@ where
})
.map_err(io::Error::error_message)
}
+
+/// Given a buf and some bounds, if there is a line terminator at the end of
+/// the given bounds in buf, then the bounds are trimmed to remove the line
+/// terminator.
+pub fn trim_line_terminator(
+ searcher: &Searcher,
+ buf: &[u8],
+ line: &mut Match,
+) {
+ let lineterm = searcher.line_terminator();
+ if lineterm.is_suffix(&buf[*line]) {
+ let mut end = line.end() - 1;
+ if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') {
+ end -= 1;
+ }
+ *line = line.with_end(end);
+ }
+}
diff --git a/tests/regression.rs b/tests/regression.rs
index d110c99c..cd14d5e4 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -806,7 +806,32 @@ rgtest!(r1389_bad_symlinks_no_biscuit, |dir: Dir, mut cmd: TestCommand| {
eqnice!("mylink/file.txt:test\n", stdout);
});
-// printf "foo\nbar\n" | rg -PU '(?<=foo\n)bar' -r quux
+// See: https://github.com/BurntSushi/ripgrep/issues/1401
+rgtest!(r1401_look_ahead_only_matching_1, |dir: Dir, mut cmd: TestCommand| {
+ // Only PCRE2 supports look-around.
+ if !dir.is_pcre2() {
+ return;
+ }
+ dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\n");
+ cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]);
+ eqnice!("xo\ncat\tdo\n", cmd.stdout());
+
+ let mut cmd = dir.command();
+ cmd.args(&["-No", r".*o(?!.*[ \t])", "ip.txt"]);
+ eqnice!("xo\ncat\tdo\n", cmd.stdout());
+});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1401
+rgtest!(r1401_look_ahead_only_matching_2, |dir: Dir, mut cmd: TestCommand| {
+ // Only PCRE2 supports look-around.
+ if !dir.is_pcre2() {
+ return;
+ }
+ dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\nfoo");
+ cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]);
+ eqnice!("xo\ncat\tdo\nfoo\n", cmd.stdout());
+});
+
// See: https://github.com/BurntSushi/ripgrep/issues/1412
rgtest!(r1412_look_behind_no_replacement, |dir: Dir, mut cmd: TestCommand| {
// Only PCRE2 supports look-around.