summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2021-06-12 14:16:35 -0400
committerAndrew Gallant <jamslam@gmail.com>2021-06-12 14:18:53 -0400
commitbc76a30c23317526441a76f99ff397f65acea607 (patch)
treebd8fccdedac9718026f49443c336b9a09de23614
parent5e81c60b35ce1481cdce5aa933b808faa71bc90e (diff)
regex: fix -w when regex can match empty string
This is a weird bug where our optimization for handling -w more quickly than we would otherwise failed. In particular, if the original regex can match the empty string, then our word boundary detection would produce invalid indices to the start the next search at. We "fix" it by simply bailing when the indices are known to be incorrect. This wasn't a problem in a previous release since ripgrep 13 tweaked how word boundaries are detected in commit efd9cfb2. Fixes #1891
-rw-r--r--crates/matcher/src/lib.rs4
-rw-r--r--crates/regex/src/word.rs11
-rw-r--r--tests/regression.rs8
3 files changed, 19 insertions, 4 deletions
diff --git a/crates/matcher/src/lib.rs b/crates/matcher/src/lib.rs
index 92365efb..5b43b0d8 100644
--- a/crates/matcher/src/lib.rs
+++ b/crates/matcher/src/lib.rs
@@ -116,7 +116,7 @@ impl Match {
/// This method panics if `start > self.end`.
#[inline]
pub fn with_start(&self, start: usize) -> Match {
- assert!(start <= self.end);
+ assert!(start <= self.end, "{} is not <= {}", start, self.end);
Match { start, ..*self }
}
@@ -128,7 +128,7 @@ impl Match {
/// This method panics if `self.start > end`.
#[inline]
pub fn with_end(&self, end: usize) -> Match {
- assert!(self.start <= end);
+ assert!(self.start <= end, "{} is not <= {}", self.start, end);
Match { end, ..*self }
}
diff --git a/crates/regex/src/word.rs b/crates/regex/src/word.rs
index d73dd6ca..aa08164b 100644
--- a/crates/regex/src/word.rs
+++ b/crates/regex/src/word.rs
@@ -111,8 +111,15 @@ impl WordMatcher {
}
let (_, slen) = bstr::decode_utf8(&haystack[cand]);
let (_, elen) = bstr::decode_last_utf8(&haystack[cand]);
- cand =
- cand.with_start(cand.start() + slen).with_end(cand.end() - elen);
+ let new_start = cand.start() + slen;
+ let new_end = cand.end() - elen;
+ // This occurs the original regex can match the empty string. In this
+ // case, just bail instead of trying to get it right here since it's
+ // likely a pathological case.
+ if new_start > new_end {
+ return Err(());
+ }
+ cand = cand.with_start(new_start).with_end(new_end);
if self.original.is_match(&haystack[cand]) {
Ok(Some(cand))
} else {
diff --git a/tests/regression.rs b/tests/regression.rs
index c905e736..e35e953f 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -1029,3 +1029,11 @@ rgtest!(r1878, |dir: Dir, _: TestCommand| {
let args = &["-U", "--mmap", r"\Abaz", "test"];
dir.command().args(args).assert_err();
});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1891
+rgtest!(r1891, |dir: Dir, mut cmd: TestCommand| {
+ dir.create("test", "\n##\n");
+ // N.B. We use -o here to force the issue to occur, which seems to only
+ // happen when each match needs to be detected.
+ eqnice!("1:\n2:\n2:\n", cmd.args(&["-won", "", "test"]).stdout());
+});