summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--grep-regex/src/matcher.rs25
-rw-r--r--tests/regression.rs10
2 files changed, 33 insertions, 2 deletions
diff --git a/grep-regex/src/matcher.rs b/grep-regex/src/matcher.rs
index 7f30252a..61af0518 100644
--- a/grep-regex/src/matcher.rs
+++ b/grep-regex/src/matcher.rs
@@ -71,10 +71,31 @@ impl RegexMatcherBuilder {
&self,
literals: &[B],
) -> Result<RegexMatcher, Error> {
- let slices: Vec<_> = literals.iter().map(|s| s.as_ref()).collect();
- if !self.config.can_plain_aho_corasick() || literals.len() < 40 {
+ let mut has_escape = false;
+ let mut slices = vec![];
+ for lit in literals {
+ slices.push(lit.as_ref());
+ has_escape = has_escape || lit.as_ref().contains('\\');
+ }
+ // Even when we have a fixed set of literals, we might still want to
+ // use the regex engine. Specifically, if any string has an escape
+ // in it, then we probably can't feed it to Aho-Corasick without
+ // removing the escape. Additionally, if there are any particular
+ // special match semantics we need to honor, that Aho-Corasick isn't
+ // enough. Finally, the regex engine can do really well with a small
+ // number of literals (at time of writing, this is changing soon), so
+ // we use it when there's a small set.
+ //
+ // Yes, this is one giant hack. Ideally, this entirely separate literal
+ // matcher that uses Aho-Corasick would be pushed down into the regex
+ // engine.
+ if has_escape
+ || !self.config.can_plain_aho_corasick()
+ || literals.len() < 40
+ {
return self.build(&slices.join("|"));
}
+
let matcher = MultiLiteralMatcher::new(&slices)?;
let imp = RegexMatcherImpl::MultiLiteral(matcher);
Ok(RegexMatcher {
diff --git a/tests/regression.rs b/tests/regression.rs
index 40a84654..88f2194d 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -716,3 +716,13 @@ rgtest!(r1259_drop_last_byte_nonl, |dir: Dir, mut cmd: TestCommand| {
cmd = dir.command();
eqnice!("fz\n", cmd.arg("-f").arg("patterns-nl").arg("test").stdout());
});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1334
+rgtest!(r1334_crazy_literals, |dir: Dir, mut cmd: TestCommand| {
+ dir.create("patterns", &"1.208.0.0/12\n".repeat(40));
+ dir.create("corpus", "1.208.0.0/12\n");
+ eqnice!(
+ "1.208.0.0/12\n",
+ cmd.arg("-Ff").arg("patterns").arg("corpus").stdout()
+ );
+});