diff options
Diffstat (limited to 'src/snippet/mod.rs')
-rw-r--r-- | src/snippet/mod.rs | 55 |
1 files changed, 24 insertions, 31 deletions
diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index fddecce..1f602c9 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -63,7 +63,7 @@ impl FragmentCandidate { fn try_add_token(&mut self, token: &Token, terms: &BTreeMap<String, f32>) { self.stop_offset = token.offset_to; - if let Some(score) = terms.get(&token.text.to_lowercase()) { + if let Some(&score) = terms.get(&token.text.to_lowercase()) { self.score += score; self.highlighted .push(HighlightSection::new(token.offset_from, token.offset_to)); @@ -142,7 +142,7 @@ impl Snippet { /// Fragments must be valid in the sense that `&text[fragment.start..fragment.stop]`\ /// has to be a valid string. fn search_fragments<'a>( - tokenizer: &dyn BoxedTokenizer, + tokenizer: &BoxedTokenizer, text: &'a str, terms: &BTreeMap<String, f32>, max_num_chars: usize, @@ -150,7 +150,6 @@ fn search_fragments<'a>( let mut token_stream = tokenizer.token_stream(text); let mut fragment = FragmentCandidate::new(0); let mut fragments: Vec<FragmentCandidate> = vec![]; - while let Some(next) = token_stream.next() { if (next.offset_to - fragment.start_offset) > max_num_chars { if fragment.score > 0.0 { @@ -214,11 +213,9 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str) /// # Example /// /// ```rust -/// # #[macro_use] -/// # extern crate tantivy; -/// # use tantivy::Index; -/// # use tantivy::schema::{Schema, TEXT}; /// # use tantivy::query::QueryParser; +/// # use tantivy::schema::{Schema, TEXT}; +/// # use tantivy::{doc, Index}; /// use tantivy::SnippetGenerator; /// /// # fn main() -> tantivy::Result<()> { @@ -254,7 +251,7 @@ fn select_best_fragment_combination(fragments: &[FragmentCandidate], text: &str) /// ``` pub struct SnippetGenerator { terms_text: BTreeMap<String, f32>, - tokenizer: Box<dyn BoxedTokenizer>, + tokenizer: BoxedTokenizer, field: Field, max_num_chars: usize, } @@ -316,12 +313,8 @@ impl SnippetGenerator { /// Generates a snippet for the given text. pub fn snippet(&self, text: &str) -> Snippet { - let fragment_candidates = search_fragments( - &*self.tokenizer, - &text, - &self.terms_text, - self.max_num_chars, - ); + let fragment_candidates = + search_fragments(&self.tokenizer, &text, &self.terms_text, self.max_num_chars); select_best_fragment_combination(&fragment_candidates[..], &text) } } @@ -331,7 +324,7 @@ mod tests { use super::{search_fragments, select_best_fragment_combination}; use crate::query::QueryParser; use crate::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions, TEXT}; - use crate::tokenizer::{box_tokenizer, SimpleTokenizer}; + use crate::tokenizer::SimpleTokenizer; use crate::Index; use crate::SnippetGenerator; use maplit::btreemap; @@ -355,12 +348,12 @@ Survey in 2016, 2017, and 2018."#; #[test] fn test_snippet() { - let boxed_tokenizer = box_tokenizer(SimpleTokenizer); + let boxed_tokenizer = SimpleTokenizer.into(); let terms = btreemap! { String::from("rust") => 1.0, String::from("language") => 0.9 }; - let fragments = search_fragments(&*boxed_tokenizer, TEST_TEXT, &terms, 100); + let fragments = search_fragments(&boxed_tokenizer, TEST_TEXT, &terms, 100); assert_eq!(fragments.len(), 7); { let first = &fragments[0]; @@ -382,13 +375,13 @@ Survey in 2016, 2017, and 2018."#; #[test] fn test_snippet_scored_fragment() { - let boxed_tokenizer = box_tokenizer(SimpleTokenizer); + let boxed_tokenizer = SimpleTokenizer.into(); { let terms = btreemap! { String::from("rust") =>1.0f32, String::from("language") => 0.9f32 }; - let fragments = search_fragments(&*boxed_tokenizer, TEST_TEXT, &terms, 20); + let fragments = search_fragments(&boxed_tokenizer, TEST_TEXT, &terms, 20); { let first = &fragments[0]; assert_eq!(first.score, 1.0); @@ -397,13 +390,13 @@ Survey in 2016, 2017, and 2018."#; let snippet = select_best_fragment_combination(&fragments[..], &TEST_TEXT); assert_eq!(snippet.to_html(), "<b>Rust</b> is a systems") } - let boxed_tokenizer = box_tokenizer(SimpleTokenizer); + let boxed_tokenizer = SimpleTokenizer.into(); { let terms = btreemap! { String::from("rust") =>0.9f32, String::from("language") => 1.0f32 }; - let fragments = search_fragments(&*boxed_tokenizer, TEST_TEXT, &terms, 20); + let fragments = search_fragments(&boxed_tokenizer, TEST_TEXT, &terms, 20); //assert_eq!(fragments.len(), 7); { let first = &fragments[0]; @@ -417,14 +410,14 @@ Survey in 2016, 2017, and 2018."#; #[test] fn test_snippet_in_second_fragment() { - let boxed_tokenizer = box_tokenizer(SimpleTokenizer); + let boxed_tokenizer = SimpleTokenizer.into(); let text = "a b c d e f g"; let mut terms = BTreeMap::new(); terms.insert(String::from("c"), 1.0); - let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3); + let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3); assert_eq!(fragments.len(), 1); { @@ -441,14 +434,14 @@ Survey in 2016, 2017, and 2018."#; #[test] fn test_snippet_with_term_at_the_end_of_fragment() { - let boxed_tokenizer = box_tokenizer(SimpleTokenizer); + let boxed_tokenizer = SimpleTokenizer.into(); let text = "a b c d e f f g"; let mut terms = BTreeMap::new(); terms.insert(String::from("f"), 1.0); - let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3); + let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3); assert_eq!(fragments.len(), 2); { @@ -465,7 +458,7 @@ Survey in 2016, 2017, and 2018."#; #[test] fn test_snippet_with_second_fragment_has_the_highest_score() { - let boxed_tokenizer = box_tokenizer(SimpleTokenizer); + let boxed_tokenizer = SimpleTokenizer.into(); let text = "a b c d e f g"; @@ -473,7 +466,7 @@ Survey in 2016, 2017, and 2018."#; terms.insert(String::from("f"), 1.0); terms.insert(String::from("a"), 0.9); - let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 7); + let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 7); assert_eq!(fragments.len(), 2); { @@ -490,14 +483,14 @@ Survey in 2016, 2017, and 2018."#; #[test] fn test_snippet_with_term_not_in_text() { - let boxed_tokenizer = box_tokenizer(SimpleTokenizer); + let boxed_tokenizer = SimpleTokenizer.into(); let text = "a b c d"; let mut terms = BTreeMap::new(); terms.insert(String::from("z"), 1.0); - let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3); + let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3); assert_eq!(fragments.len(), 0); @@ -508,12 +501,12 @@ Survey in 2016, 2017, and 2018."#; #[test] fn test_snippet_with_no_terms() { - let boxed_tokenizer = box_tokenizer(SimpleTokenizer); + let boxed_tokenizer = SimpleTokenizer.into(); let text = "a b c d"; let terms = BTreeMap::new(); - let fragments = search_fragments(&*boxed_tokenizer, &text, &terms, 3); + let fragments = search_fragments(&boxed_tokenizer, &text, &terms, 3); assert_eq!(fragments.len(), 0); let snippet = select_best_fragment_combination(&fragments[..], &text); |