diff options
Diffstat (limited to 'grep-regex/src/matcher.rs')
-rw-r--r-- | grep-regex/src/matcher.rs | 25 |
1 files changed, 23 insertions, 2 deletions
diff --git a/grep-regex/src/matcher.rs b/grep-regex/src/matcher.rs index 7f30252a..61af0518 100644 --- a/grep-regex/src/matcher.rs +++ b/grep-regex/src/matcher.rs @@ -71,10 +71,31 @@ impl RegexMatcherBuilder { &self, literals: &[B], ) -> Result<RegexMatcher, Error> { - let slices: Vec<_> = literals.iter().map(|s| s.as_ref()).collect(); - if !self.config.can_plain_aho_corasick() || literals.len() < 40 { + let mut has_escape = false; + let mut slices = vec![]; + for lit in literals { + slices.push(lit.as_ref()); + has_escape = has_escape || lit.as_ref().contains('\\'); + } + // Even when we have a fixed set of literals, we might still want to + // use the regex engine. Specifically, if any string has an escape + // in it, then we probably can't feed it to Aho-Corasick without + // removing the escape. Additionally, if there are any particular + // special match semantics we need to honor, that Aho-Corasick isn't + // enough. Finally, the regex engine can do really well with a small + // number of literals (at time of writing, this is changing soon), so + // we use it when there's a small set. + // + // Yes, this is one giant hack. Ideally, this entirely separate literal + // matcher that uses Aho-Corasick would be pushed down into the regex + // engine. + if has_escape + || !self.config.can_plain_aho_corasick() + || literals.len() < 40 + { return self.build(&slices.join("|")); } + let matcher = MultiLiteralMatcher::new(&slices)?; let imp = RegexMatcherImpl::MultiLiteral(matcher); Ok(RegexMatcher { |