diff options
-rw-r--r-- | query-grammar/src/occur.rs | 2 | ||||
-rwxr-xr-x | src/lib.rs | 14 | ||||
-rw-r--r-- | src/query/boolean_query/boolean_query.rs | 110 | ||||
-rw-r--r-- | src/query/phrase_query/phrase_query.rs | 4 |
4 files changed, 118 insertions, 12 deletions
diff --git a/query-grammar/src/occur.rs b/query-grammar/src/occur.rs index dfe70c5..25c889f 100644 --- a/query-grammar/src/occur.rs +++ b/query-grammar/src/occur.rs @@ -2,7 +2,7 @@ use std::fmt; use std::fmt::Write; /// Defines whether a term in a query must be present, -/// should be present or must not be present. +/// should be present or must be not present. #[derive(Debug, Clone, Hash, Copy, Eq, PartialEq)] pub enum Occur { /// For a given document to be considered for scoring, @@ -212,15 +212,13 @@ pub type Score = f32; pub type SegmentLocalId = u32; impl DocAddress { - /// Return the segment ordinal. - /// The segment ordinal is an id identifying the segment - /// hosting the document. It is only meaningful, in the context - /// of a searcher. + /// Return the segment ordinal id that identifies the segment + /// hosting the document in the `Searcher` it is called from. pub fn segment_ord(self) -> SegmentLocalId { self.0 } - /// Return the segment local `DocId` + /// Return the segment-local `DocId` pub fn doc(self) -> DocId { self.1 } @@ -229,11 +227,11 @@ impl DocAddress { /// `DocAddress` contains all the necessary information /// to identify a document given a `Searcher` object. /// -/// It consists in an id identifying its segment, and -/// its segment-local `DocId`. +/// It consists of an id identifying its segment, and +/// a segment-local `DocId`. /// /// The id used for the segment is actually an ordinal -/// in the list of segment hold by a `Searcher`. +/// in the list of `Segment`s held by a `Searcher`. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] pub struct DocAddress(pub SegmentLocalId, pub DocId); diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs index 00658aa..54d3673 100644 --- a/src/query/boolean_query/boolean_query.rs +++ b/src/query/boolean_query/boolean_query.rs @@ -9,7 +9,8 @@ use crate::Result; use crate::Searcher; use std::collections::BTreeSet; -/// The boolean query combines a set of queries +/// The boolean query returns a set of documents +/// that matches the Boolean combination of constituent subqueries. /// /// The documents matched by the boolean query are /// those which @@ -19,6 +20,113 @@ use std::collections::BTreeSet; /// `MustNot` occurence. /// * match at least one of the subqueries that is not /// a `MustNot` occurence. +/// +/// +/// You can combine other query types and their `Occur`ances into one `BooleanQuery` +/// +/// ```rust +///use tantivy::collector::Count; +///use tantivy::doc; +///use tantivy::query::{BooleanQuery, Occur, PhraseQuery, Query, TermQuery}; +///use tantivy::schema::{IndexRecordOption, Schema, TEXT}; +///use tantivy::Term; +///use tantivy::{Index, Result}; +/// +///fn main() -> Result<()> { +/// let mut schema_builder = Schema::builder(); +/// let title = schema_builder.add_text_field("title", TEXT); +/// let body = schema_builder.add_text_field("body", TEXT); +/// let schema = schema_builder.build(); +/// let index = Index::create_in_ram(schema); +/// { +/// let mut index_writer = index.writer(3_000_000)?; +/// index_writer.add_document(doc!( +/// title => "The Name of the Wind", +/// )); +/// index_writer.add_document(doc!( +/// title => "The Diary of Muadib", +/// )); +/// index_writer.add_document(doc!( +/// title => "A Dairy Cow", +/// body => "hidden", +/// )); +/// index_writer.add_document(doc!( +/// title => "A Dairy Cow", +/// body => "found", +/// )); +/// index_writer.add_document(doc!( +/// title => "The Diary of a Young Girl", +/// )); +/// index_writer.commit().unwrap(); +/// } +/// +/// let reader = index.reader()?; +/// let searcher = reader.searcher(); +/// +/// // Make TermQuery's for "girl" and "diary" in the title +/// let girl_term_query: Box<dyn Query> = Box::new(TermQuery::new( +/// Term::from_field_text(title, "girl"), +/// IndexRecordOption::Basic, +/// )); +/// let diary_term_query: Box<dyn Query> = Box::new(TermQuery::new( +/// Term::from_field_text(title, "diary"), +/// IndexRecordOption::Basic, +/// )); +/// // A TermQuery with "found" in the body +/// let body_term_query: Box<dyn Query> = Box::new(TermQuery::new( +/// Term::from_field_text(body, "found"), +/// IndexRecordOption::Basic, +/// )); +/// // TermQuery "diary" must and "girl" must not be present +/// let queries_with_occurs1 = vec![ +/// (Occur::Must, diary_term_query.box_clone()), +/// (Occur::MustNot, girl_term_query), +/// ]; +/// // Make a BooleanQuery equivalent to +/// // title:+diary title:-girl +/// let diary_must_and_girl_mustnot = BooleanQuery::from(queries_with_occurs1); +/// let count1 = searcher.search(&diary_must_and_girl_mustnot, &Count)?; +/// assert_eq!(count1, 1); +/// +/// // TermQuery for "cow" in the title +/// let cow_term_query: Box<dyn Query> = Box::new(TermQuery::new( +/// Term::from_field_text(title, "cow"), +/// IndexRecordOption::Basic, +/// )); +/// // "title:diary OR title:cow" +/// let title_diary_or_cow = BooleanQuery::from(vec![ +/// (Occur::Should, diary_term_query.box_clone()), +/// (Occur::Should, cow_term_query), +/// ]); +/// let count2 = searcher.search(&title_diary_or_cow, &Count)?; +/// assert_eq!(count2, 4); +/// +/// // Make a `PhraseQuery` from a vector of `Term`s +/// let phrase_query: Box<dyn Query> = Box::new(PhraseQuery::new(vec![ +/// Term::from_field_text(title, "dairy"), +/// Term::from_field_text(title, "cow"), +/// ])); +/// // You can combine subqueries of different types into 1 BooleanQuery: +/// // `TermQuery` and `PhraseQuery` +/// // "title:diary OR "dairy cow" +/// let term_of_phrase_query = BooleanQuery::from(vec![ +/// (Occur::Should, diary_term_query.box_clone()), +/// (Occur::Should, phrase_query.box_clone()), +/// ]); +/// let count3 = searcher.search(&term_of_phrase_query, &Count)?; +/// assert_eq!(count3, 4); +/// +/// // You can nest one BooleanQuery inside another +/// // body:found AND ("title:diary OR "dairy cow") +/// let nested_query = BooleanQuery::from(vec![ +/// (Occur::Must, body_term_query), +/// (Occur::Must, Box::new(term_of_phrase_query)) +/// ]); +/// let count4 = searcher.search(&nested_query, &Count)?; +/// assert_eq!(count4, 1); +/// Ok(()) +///} +/// ``` #[derive(Debug)] pub struct BooleanQuery { subqueries: Vec<(Occur, Box<dyn Query>)>, diff --git a/src/query/phrase_query/phrase_query.rs b/src/query/phrase_query/phrase_query.rs index 8c1126e..be95b32 100644 --- a/src/query/phrase_query/phrase_query.rs +++ b/src/query/phrase_query/phrase_query.rs @@ -40,7 +40,7 @@ impl PhraseQuery { PhraseQuery::new_with_offset(terms_with_offset) } - /// Creates a new `PhraseQuery` given a list of terms and there offsets. + /// Creates a new `PhraseQuery` given a list of terms and their offsets. /// /// Can be used to provide custom offset for each term. pub fn new_with_offset(mut terms: Vec<(usize, Term)>) -> PhraseQuery { @@ -73,7 +73,7 @@ impl PhraseQuery { .collect::<Vec<Term>>() } - /// Returns the `PhraseWeight` for the given phrase query given a specific `searcher`. + /// Returns the `PhraseWeight` for the given phrase query given a specific `searcher`. /// /// This function is the same as `.weight(...)` except it returns /// a specialized type `PhraseWeight` instead of a Boxed trait. |