diff options
Diffstat (limited to 'grep/src/word_boundary.rs')
-rw-r--r-- | grep/src/word_boundary.rs | 53 |
1 files changed, 0 insertions, 53 deletions
diff --git a/grep/src/word_boundary.rs b/grep/src/word_boundary.rs deleted file mode 100644 index 8e6b86d1..00000000 --- a/grep/src/word_boundary.rs +++ /dev/null @@ -1,53 +0,0 @@ -use syntax::hir::{self, Hir, HirKind}; - -/// Strips Unicode word boundaries from the given expression. -/// -/// The key invariant this maintains is that the expression returned will match -/// *at least* every where the expression given will match. Namely, a match of -/// the returned expression can report false positives but it will never report -/// false negatives. -/// -/// If no word boundaries could be stripped, then None is returned. -pub fn strip_unicode_word_boundaries(expr: &Hir) -> Option<Hir> { - // The real reason we do this is because Unicode word boundaries are the - // one thing that Rust's regex DFA engine can't handle. When it sees a - // Unicode word boundary among non-ASCII text, it falls back to one of the - // slower engines. We work around this limitation by attempting to use - // a regex to find candidate matches without a Unicode word boundary. We'll - // only then use the full (and slower) regex to confirm a candidate as a - // match or not during search. - // - // It looks like we only check the outer edges for `\b`? I guess this is - // an attempt to optimize for the `-w/--word-regexp` flag? ---AG - match *expr.kind() { - HirKind::Concat(ref es) if !es.is_empty() => { - let first = is_unicode_word_boundary(&es[0]); - let last = is_unicode_word_boundary(es.last().unwrap()); - // Be careful not to strip word boundaries if there are no other - // expressions to match. - match (first, last) { - (true, false) if es.len() > 1 => { - Some(Hir::concat(es[1..].to_vec())) - } - (false, true) if es.len() > 1 => { - Some(Hir::concat(es[..es.len() - 1].to_vec())) - } - (true, true) if es.len() > 2 => { - Some(Hir::concat(es[1..es.len() - 1].to_vec())) - } - _ => None, - } - } - _ => None, - } -} - -/// Returns true if the given expression is a Unicode word boundary. -fn is_unicode_word_boundary(expr: &Hir) -> bool { - match *expr.kind() { - HirKind::WordBoundary(hir::WordBoundary::Unicode) => true, - HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => true, - HirKind::Group(ref x) => is_unicode_word_boundary(&x.hir), - _ => false, - } -} |