diff options
Diffstat (limited to 'src/query/regex_query.rs')
-rw-r--r-- | src/query/regex_query.rs | 102 |
1 files changed, 74 insertions, 28 deletions
diff --git a/src/query/regex_query.rs b/src/query/regex_query.rs index 3296a8d..14c281d 100644 --- a/src/query/regex_query.rs +++ b/src/query/regex_query.rs @@ -4,22 +4,18 @@ use crate::schema::Field; use crate::Result; use crate::Searcher; use std::clone::Clone; +use std::sync::Arc; use tantivy_fst::Regex; -// A Regex Query matches all of the documents +/// A Regex Query matches all of the documents /// containing a specific term that matches -/// a regex pattern -/// A Fuzzy Query matches all of the documents -/// containing a specific term that is within -/// Levenshtein distance +/// a regex pattern. /// /// ```rust -/// #[macro_use] -/// extern crate tantivy; -/// use tantivy::schema::{Schema, TEXT}; -/// use tantivy::{Index, Result, Term}; /// use tantivy::collector::Count; /// use tantivy::query::RegexQuery; +/// use tantivy::schema::{Schema, TEXT}; +/// use tantivy::{doc, Index, Result, Term}; /// /// # fn main() { example().unwrap(); } /// fn example() -> Result<()> { @@ -48,7 +44,7 @@ use tantivy_fst::Regex; /// let searcher = reader.searcher(); /// /// let term = Term::from_field_text(title, "Diary"); -/// let query = RegexQuery::new("d[ai]{2}ry".to_string(), title); +/// let query = RegexQuery::from_pattern("d[ai]{2}ry", title)?; /// let count = searcher.search(&query, &Count)?; /// assert_eq!(count, 3); /// Ok(()) @@ -56,30 +52,34 @@ use tantivy_fst::Regex; /// ``` #[derive(Debug, Clone)] pub struct RegexQuery { - regex_pattern: String, + regex: Arc<Regex>, field: Field, } impl RegexQuery { - /// Creates a new Fuzzy Query - pub fn new(regex_pattern: String, field: Field) -> RegexQuery { + /// Creates a new RegexQuery from a given pattern + pub fn from_pattern(regex_pattern: &str, field: Field) -> Result<Self> { + let regex = Regex::new(®ex_pattern) + .map_err(|_| TantivyError::InvalidArgument(regex_pattern.to_string()))?; + Ok(RegexQuery::from_regex(regex, field)) + } + + /// Creates a new RegexQuery from a fully built Regex + pub fn from_regex<T: Into<Arc<Regex>>>(regex: T, field: Field) -> Self { RegexQuery { - regex_pattern, + regex: regex.into(), field, } } - fn specialized_weight(&self) -> Result<AutomatonWeight<Regex>> { - let automaton = Regex::new(&self.regex_pattern) - .map_err(|_| TantivyError::InvalidArgument(self.regex_pattern.clone()))?; - - Ok(AutomatonWeight::new(self.field, automaton)) + fn specialized_weight(&self) -> AutomatonWeight<Regex> { + AutomatonWeight::new(self.field, self.regex.clone()) } } impl Query for RegexQuery { fn weight(&self, _searcher: &Searcher, _scoring_enabled: bool) -> Result<Box<dyn Weight>> { - Ok(Box::new(self.specialized_weight()?)) + Ok(Box::new(self.specialized_weight())) } } @@ -87,13 +87,14 @@ impl Query for RegexQuery { mod test { use super::RegexQuery; use crate::collector::TopDocs; - use crate::schema::Schema; use crate::schema::TEXT; + use crate::schema::{Field, Schema}; use crate::tests::assert_nearly_equals; - use crate::Index; + use crate::{Index, IndexReader}; + use std::sync::Arc; + use tantivy_fst::Regex; - #[test] - pub fn test_regex_query() { + fn build_test_index() -> (IndexReader, Field) { let mut schema_builder = Schema::builder(); let country_field = schema_builder.add_text_field("country", TEXT); let schema = schema_builder.build(); @@ -109,20 +110,65 @@ mod test { index_writer.commit().unwrap(); } let reader = index.reader().unwrap(); + + (reader, country_field) + } + + fn verify_regex_query( + query_matching_one: RegexQuery, + query_matching_zero: RegexQuery, + reader: IndexReader, + ) { let searcher = reader.searcher(); { - let regex_query = RegexQuery::new("jap[ao]n".to_string(), country_field); let scored_docs = searcher - .search(®ex_query, &TopDocs::with_limit(2)) + .search(&query_matching_one, &TopDocs::with_limit(2)) .unwrap(); assert_eq!(scored_docs.len(), 1, "Expected only 1 document"); let (score, _) = scored_docs[0]; assert_nearly_equals(1f32, score); } - let regex_query = RegexQuery::new("jap[A-Z]n".to_string(), country_field); let top_docs = searcher - .search(®ex_query, &TopDocs::with_limit(2)) + .search(&query_matching_zero, &TopDocs::with_limit(2)) .unwrap(); assert!(top_docs.is_empty(), "Expected ZERO document"); } + + #[test] + pub fn test_regex_query() { + let (reader, field) = build_test_index(); + + let matching_one = RegexQuery::from_pattern("jap[ao]n", field).unwrap(); + let matching_zero = RegexQuery::from_pattern("jap[A-Z]n", field).unwrap(); + + verify_regex_query(matching_one, matching_zero, reader); + } + + #[test] + pub fn test_construct_from_regex() { + let (reader, field) = build_test_index(); + + let matching_one = RegexQuery::from_regex(Regex::new("jap[ao]n").unwrap(), field); + let matching_zero = RegexQuery::from_regex(Regex::new("jap[A-Z]n").unwrap(), field); + + verify_regex_query(matching_one, matching_zero, reader); + } + + #[test] + pub fn test_construct_from_reused_regex() { + let r1 = Arc::new(Regex::new("jap[ao]n").unwrap()); + let r2 = Arc::new(Regex::new("jap[A-Z]n").unwrap()); + + let (reader, field) = build_test_index(); + + let matching_one = RegexQuery::from_regex(r1.clone(), field); + let matching_zero = RegexQuery::from_regex(r2.clone(), field); + + verify_regex_query(matching_one, matching_zero, reader.clone()); + + let matching_one = RegexQuery::from_regex(r1.clone(), field); + let matching_zero = RegexQuery::from_regex(r2.clone(), field); + + verify_regex_query(matching_one, matching_zero, reader.clone()); + } } |