summaryrefslogtreecommitdiffstats
path: root/grep/src/literals.rs
diff options
context:
space:
mode:
Diffstat (limited to 'grep/src/literals.rs')
-rw-r--r--grep/src/literals.rs119
1 files changed, 59 insertions, 60 deletions
diff --git a/grep/src/literals.rs b/grep/src/literals.rs
index eebeac4c..3e1c385b 100644
--- a/grep/src/literals.rs
+++ b/grep/src/literals.rs
@@ -10,10 +10,8 @@ principled.
use std::cmp;
use regex::bytes::RegexBuilder;
-use syntax::{
- Expr, Literals, Lit,
- ByteClass, ByteRange, CharClass, ClassRange, Repeater,
-};
+use syntax::hir::{self, Hir, HirKind};
+use syntax::hir::literal::{Literal, Literals};
#[derive(Clone, Debug)]
pub struct LiteralSets {
@@ -23,12 +21,12 @@ pub struct LiteralSets {
}
impl LiteralSets {
- pub fn create(expr: &Expr) -> Self {
+ pub fn create(expr: &Hir) -> Self {
let mut required = Literals::empty();
union_required(expr, &mut required);
LiteralSets {
- prefixes: expr.prefixes(),
- suffixes: expr.suffixes(),
+ prefixes: Literals::prefixes(expr),
+ suffixes: Literals::suffixes(expr),
required: required,
}
}
@@ -93,60 +91,52 @@ impl LiteralSets {
}
}
-fn union_required(expr: &Expr, lits: &mut Literals) {
- use syntax::Expr::*;
- match *expr {
- Literal { ref chars, casei: false } => {
- let s: String = chars.iter().cloned().collect();
- lits.cross_add(s.as_bytes());
+fn union_required(expr: &Hir, lits: &mut Literals) {
+ match *expr.kind() {
+ HirKind::Literal(hir::Literal::Unicode(c)) => {
+ let mut buf = [0u8; 4];
+ lits.cross_add(c.encode_utf8(&mut buf).as_bytes());
}
- Literal { ref chars, casei: true } => {
- for &c in chars {
- let cls = CharClass::new(vec![
- ClassRange { start: c, end: c },
- ]).case_fold();
- if !lits.add_char_class(&cls) {
- lits.cut();
- return;
- }
+ HirKind::Literal(hir::Literal::Byte(b)) => {
+ lits.cross_add(&[b]);
+ }
+ HirKind::Class(hir::Class::Unicode(ref cls)) => {
+ if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) {
+ lits.cut();
+ }
+ }
+ HirKind::Class(hir::Class::Bytes(ref cls)) => {
+ if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) {
+ lits.cut();
}
}
- LiteralBytes { ref bytes, casei: false } => {
- lits.cross_add(bytes);
+ HirKind::Group(hir::Group { ref hir, .. }) => {
+ union_required(&**hir, lits);
}
- LiteralBytes { ref bytes, casei: true } => {
- for &b in bytes {
- let cls = ByteClass::new(vec![
- ByteRange { start: b, end: b },
- ]).case_fold();
- if !lits.add_byte_class(&cls) {
+ HirKind::Repetition(ref x) => {
+ match x.kind {
+ hir::RepetitionKind::ZeroOrOne => lits.cut(),
+ hir::RepetitionKind::ZeroOrMore => lits.cut(),
+ hir::RepetitionKind::OneOrMore => {
+ union_required(&x.hir, lits);
lits.cut();
- return;
+ }
+ hir::RepetitionKind::Range(ref rng) => {
+ let (min, max) = match *rng {
+ hir::RepetitionRange::Exactly(m) => (m, Some(m)),
+ hir::RepetitionRange::AtLeast(m) => (m, None),
+ hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
+ };
+ repeat_range_literals(
+ &x.hir, min, max, x.greedy, lits, union_required);
}
}
}
- Class(_) => {
- lits.cut();
- }
- ClassBytes(_) => {
- lits.cut();
+ HirKind::Concat(ref es) if es.is_empty() => {}
+ HirKind::Concat(ref es) if es.len() == 1 => {
+ union_required(&es[0], lits)
}
- Group { ref e, .. } => {
- union_required(&**e, lits);
- }
- Repeat { r: Repeater::ZeroOrOne, .. } => lits.cut(),
- Repeat { r: Repeater::ZeroOrMore, .. } => lits.cut(),
- Repeat { ref e, r: Repeater::OneOrMore, .. } => {
- union_required(&**e, lits);
- lits.cut();
- }
- Repeat { ref e, r: Repeater::Range { min, max }, greedy } => {
- repeat_range_literals(
- &**e, min, max, greedy, lits, union_required);
- }
- Concat(ref es) if es.is_empty() => {}
- Concat(ref es) if es.len() == 1 => union_required(&es[0], lits),
- Concat(ref es) => {
+ HirKind::Concat(ref es) => {
for e in es {
let mut lits2 = lits.to_empty();
union_required(e, &mut lits2);
@@ -157,7 +147,6 @@ fn union_required(expr: &Expr, lits: &mut Literals) {
if lits2.contains_empty() {
lits.cut();
}
- // if !lits.union(lits2) {
if !lits.cross_product(&lits2) {
// If this expression couldn't yield any literal that
// could be extended, then we need to quit. Since we're
@@ -167,15 +156,15 @@ fn union_required(expr: &Expr, lits: &mut Literals) {
}
}
}
- Alternate(ref es) => {
+ HirKind::Alternation(ref es) => {
alternate_literals(es, lits, union_required);
}
_ => lits.cut(),
}
}
-fn repeat_range_literals<F: FnMut(&Expr, &mut Literals)>(
- e: &Expr,
+fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
+ e: &Hir,
min: u32,
max: Option<u32>,
_greedy: bool,
@@ -204,8 +193,8 @@ fn repeat_range_literals<F: FnMut(&Expr, &mut Literals)>(
}
}
-fn alternate_literals<F: FnMut(&Expr, &mut Literals)>(
- es: &[Expr],
+fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
+ es: &[Hir],
lits: &mut Literals,
mut f: F,
) {
@@ -234,11 +223,21 @@ fn alternate_literals<F: FnMut(&Expr, &mut Literals)>(
}
lits.cut();
if !lcs.is_empty() {
- lits.add(Lit::empty());
- lits.add(Lit::new(lcs.to_vec()));
+ lits.add(Literal::empty());
+ lits.add(Literal::new(lcs.to_vec()));
}
}
+/// Return the number of characters in the given class.
+fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 {
+ cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
+}
+
+/// Return the number of bytes in the given class.
+fn count_byte_class(cls: &hir::ClassBytes) -> u32 {
+ cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
+}
+
/// Converts an arbitrary sequence of bytes to a literal suitable for building
/// a regular expression.
fn bytes_to_regex(bs: &[u8]) -> String {