summaryrefslogtreecommitdiffstats
path: root/crates
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2021-05-29 07:34:14 -0400
committerAndrew Gallant <jamslam@gmail.com>2021-05-29 07:37:28 -0400
commit581a35e568c3acd32461d276a4cfe746524e17cd (patch)
treeef3de275dfeb8a0f93db684157d03dadcd6c5386 /crates
parentba965962fe2fc3513aeeaa99665f09099d92045d (diff)
impl: fix --multiline anchored match bug
This fixes a bug where using \A or (?-m)^ in combination with -U/--multiline would permit matches that aren't anchored to the beginning of the file. The underlying cause was an optimization that occurred when mmaps couldn't be used. Namely, ripgrep tries to still read the input incrementally if it knows the pattern can't match through a new line. But the detection logic was flawed, since it didn't account for line anchors. This commit fixes that. Fixes #1878, Fixes #1879
Diffstat (limited to 'crates')
-rw-r--r--crates/regex/src/non_matching.rs5
1 files changed, 4 insertions, 1 deletions
diff --git a/crates/regex/src/non_matching.rs b/crates/regex/src/non_matching.rs
index 2270f94d..e2e0755b 100644
--- a/crates/regex/src/non_matching.rs
+++ b/crates/regex/src/non_matching.rs
@@ -13,7 +13,10 @@ pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
/// the given expression.
fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
match *expr.kind() {
- HirKind::Empty | HirKind::Anchor(_) | HirKind::WordBoundary(_) => {}
+ HirKind::Empty | HirKind::WordBoundary(_) => {}
+ HirKind::Anchor(_) => {
+ set.remove(b'\n');
+ }
HirKind::Literal(hir::Literal::Unicode(c)) => {
for &b in c.encode_utf8(&mut [0; 4]).as_bytes() {
set.remove(b);