// This module is currently a prototype. It was inspired a bit by a similar
// implementation in codesearch[1]. The main difference is that it isn't fixed
// to a particular ngram size and isn't quite as smart about reducing memory
// usage. This was hastily written in order to bootstrap ngram extraction so
// that other things in this crate could be worked on.
//
// Moving forward, we should polish this module up and probably generate
// ngrams via graphemes or at least codepoints. The main issue with doing that
// is that it will likely severely hinder our indexing speed. But alas, not
// enough is built yet to let us quantify the performance difference.
//
// [1] - https://github.com/google/codesearch
use std::cmp;
use std::mem;
use bstr::{BString, ByteSlice, ByteVec};
use regex_syntax::hir::{self, Hir, HirKind};
#[derive(Clone)]
pub enum GramQuery {
Literal(BString),
And(Vec<GramQuery>),
Or(Vec<GramQuery>),
}
impl GramQuery {
fn nothing() -> GramQuery {
GramQuery::Or(vec![])
}
fn anything() -> GramQuery {
GramQuery::And(vec![])
}
fn is_nothing(&self) -> bool {
match *self {
GramQuery::Or(ref qs) => qs.is_empty(),
_ => false,
}
}
fn is_anything(&self) -> bool {
match *self {
GramQuery::And(ref qs) => qs.is_empty(),
_ => false,
}
}
fn is_literal(&self) -> bool {
match *self {
GramQuery::Literal(..) => true,
_ => false,
}
}
fn is_and(&self) -> bool {
match *self {
GramQuery::And(..) => true,
_ => false,
}
}
fn is_or(&self) -> bool {
match *self {
GramQuery::Or(..) => true,
_ => false,
}
}
fn unwrap(self) -> GramQuery {
match self {
q @ GramQuery::Literal(_) => q,
GramQuery::And(qs) => GramQuery::and(qs),
GramQuery::Or(qs) => GramQuery::or(qs),
}
}
fn unwrap_literal(self) -> BString {
match self {
GramQuery::Literal(lit) => lit,
GramQuery::And(_) => panic!("expected literal, but got And"),
GramQuery::Or(_) => panic!("expected literal, but got Or"),
}
}
fn or(mut queries: Vec<GramQuery>) -> GramQuery {
queries.retain(|q| !q.is_nothing());
if queries.iter().any(GramQuery::is_anything) {
GramQuery::anything()
} else if queries.len() == 1 {
queries.pop().unwrap()
} else if queries.iter().all(GramQuery::is_literal) {
let set = LiteralSet::from_vec(
queries.into_iter().map(GramQuery::unwrap_literal).collect(),
);
GramQuery::from_set_or(set)
} else if queries.iter().any(GramQuery::is_or) {
let mut flat = vec![];
for q in queries {
match q {
GramQuery::Or(qs) => {
flat.extend(qs);
}
q => flat.push(q),
}
}
GramQuery::or(flat)
} else {
GramQuery::Or(queries)
}
}
fn and(mut queries: Vec<GramQuery>) -> GramQuery {
queries.retain(|q| !q.is_anything());
if queries.iter().any(GramQuery::is_nothing) {
GramQuery::nothing()
} else if queries.len() == 1 {
queries.pop().unwrap()
}