summaryrefslogtreecommitdiffstats
path: root/src/query
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2016-11-03 14:28:14 +0900
committerPaul Masurel <paul.masurel@gmail.com>2016-11-03 14:28:14 +0900
commita2c6ec93e0e70ca12b634dc9e657ee5f73e7b3f6 (patch)
treefc1e4f168918d8cbb64ed759c4fb92fcceccfe19 /src/query
parent59d1b9e2bbf464e0aa677b2f3e25409906494a21 (diff)
issue/50 Fixed VecPostings... Changed intersections.
Diffstat (limited to 'src/query')
-rw-r--r--src/query/boolean_query/boolean_scorer.rs73
-rw-r--r--src/query/boolean_query/mod.rs71
-rw-r--r--src/query/multi_term_query/multi_term_query.rs7
-rw-r--r--src/query/phrase_query/mod.rs63
-rw-r--r--src/query/phrase_query/phrase_query.rs5
-rw-r--r--src/query/phrase_query/phrase_scorer.rs43
-rw-r--r--src/query/phrase_query/phrase_weight.rs4
-rw-r--r--src/query/term_query/term_query.rs4
-rw-r--r--src/query/term_query/term_weight.rs5
9 files changed, 189 insertions, 86 deletions
diff --git a/src/query/boolean_query/boolean_scorer.rs b/src/query/boolean_query/boolean_scorer.rs
index 8f35cdc..4be2cd7 100644
--- a/src/query/boolean_query/boolean_scorer.rs
+++ b/src/query/boolean_query/boolean_scorer.rs
@@ -77,6 +77,10 @@ impl<TScorer: Scorer> BooleanScorer<TScorer> {
}
}
+ pub fn num_subscorers(&self) -> usize {
+ self.postings.len()
+ }
+
/// Advances the head of our heap (the segment postings with the lowest doc)
/// It will also update the new current `DocId` as well as the term frequency
@@ -148,72 +152,3 @@ impl<TScorer: Scorer> Scorer for BooleanScorer<TScorer> {
}
}
-
-
-
-#[cfg(test)]
-mod tests {
-
- use super::*;
- use postings::{DocSet, VecPostings};
- use query::Scorer;
- use query::OccurFilter;
- use query::term_query::TermScorer;
- use query::Occur;
- use fastfield::{U32FastFieldReader};
-
- fn abs_diff(left: f32, right: f32) -> f32 {
- (right - left).abs()
- }
-
- #[test]
- pub fn test_boolean_scorer() {
- let occurs = vec!(Occur::Should, Occur::Should);
- let occur_filter = OccurFilter::new(&occurs);
-
- let left_fieldnorms = U32FastFieldReader::from(vec!(100,200,300));
-
- let left = VecPostings::from(vec!(1, 2, 3));
- let left_scorer = TermScorer {
- idf: 1f32,
- fieldnorm_reader: left_fieldnorms,
- postings: left,
- };
-
- let right_fieldnorms = U32FastFieldReader::from(vec!(15,25,35));
- let right = VecPostings::from(vec!(1, 3, 8));
-
- let right_scorer = TermScorer {
- idf: 4f32,
- fieldnorm_reader: right_fieldnorms,
- postings: right,
- };
-
- let mut boolean_scorer = BooleanScorer::new(vec!(left_scorer, right_scorer), occur_filter);
- assert_eq!(boolean_scorer.next(), Some(1u32));
- assert!(abs_diff(boolean_scorer.score(), 0.8707107) < 0.001);
- assert_eq!(boolean_scorer.next(), Some(2u32));
- assert!(abs_diff(boolean_scorer.score(), 0.028867513) < 0.001f32);
- assert_eq!(boolean_scorer.next(), Some(3u32));
- assert_eq!(boolean_scorer.next(), Some(8u32));
- assert!(abs_diff(boolean_scorer.score(), 0.5163978) < 0.001f32);
- assert!(!boolean_scorer.advance());
- }
-
-
- #[test]
- pub fn test_term_scorer() {
- let left_fieldnorms = U32FastFieldReader::from(vec!(10, 4));
- assert_eq!(left_fieldnorms.get(0), 10);
- assert_eq!(left_fieldnorms.get(1), 4);
- let left = VecPostings::from(vec!(1));
- let mut left_scorer = TermScorer {
- idf: 0.30685282,
- fieldnorm_reader: left_fieldnorms,
- postings: left,
- };
- left_scorer.advance();
- assert!(abs_diff(left_scorer.score(), 0.15342641) < 0.001f32);
- }
-
-}
diff --git a/src/query/boolean_query/mod.rs b/src/query/boolean_query/mod.rs
index 3f19cb9..43d0e33 100644
--- a/src/query/boolean_query/mod.rs
+++ b/src/query/boolean_query/mod.rs
@@ -7,4 +7,73 @@ mod score_combiner;
pub use self::boolean_query::BooleanQuery;
pub use self::boolean_clause::BooleanClause;
pub use self::boolean_scorer::BooleanScorer;
-pub use self::score_combiner::ScoreCombiner; \ No newline at end of file
+pub use self::score_combiner::ScoreCombiner;
+
+
+
+#[cfg(test)]
+mod tests {
+
+ use super::*;
+ use postings::{DocSet, VecPostings};
+ use query::Scorer;
+ use query::OccurFilter;
+ use query::term_query::TermScorer;
+ use query::Occur;
+ use fastfield::{U32FastFieldReader};
+
+ fn abs_diff(left: f32, right: f32) -> f32 {
+ (right - left).abs()
+ }
+
+ #[test]
+ pub fn test_boolean_scorer() {
+ let occurs = vec!(Occur::Should, Occur::Should);
+ let occur_filter = OccurFilter::new(&occurs);
+
+ let left_fieldnorms = U32FastFieldReader::from(vec!(100,200,300));
+
+ let left = VecPostings::from(vec!(1, 2, 3));
+ let left_scorer = TermScorer {
+ idf: 1f32,
+ fieldnorm_reader: left_fieldnorms,
+ postings: left,
+ };
+
+ let right_fieldnorms = U32FastFieldReader::from(vec!(15,25,35));
+ let right = VecPostings::from(vec!(1, 3, 8));
+
+ let right_scorer = TermScorer {
+ idf: 4f32,
+ fieldnorm_reader: right_fieldnorms,
+ postings: right,
+ };
+
+ let mut boolean_scorer = BooleanScorer::new(vec!(left_scorer, right_scorer), occur_filter);
+ assert_eq!(boolean_scorer.next(), Some(1u32));
+ assert!(abs_diff(boolean_scorer.score(), 0.8707107) < 0.001);
+ assert_eq!(boolean_scorer.next(), Some(2u32));
+ assert!(abs_diff(boolean_scorer.score(), 0.028867513) < 0.001f32);
+ assert_eq!(boolean_scorer.next(), Some(3u32));
+ assert_eq!(boolean_scorer.next(), Some(8u32));
+ assert!(abs_diff(boolean_scorer.score(), 0.5163978) < 0.001f32);
+ assert!(!boolean_scorer.advance());
+ }
+
+
+ #[test]
+ pub fn test_term_scorer() {
+ let left_fieldnorms = U32FastFieldReader::from(vec!(10, 4));
+ assert_eq!(left_fieldnorms.get(0), 10);
+ assert_eq!(left_fieldnorms.get(1), 4);
+ let left = VecPostings::from(vec!(1));
+ let mut left_scorer = TermScorer {
+ idf: 0.30685282,
+ fieldnorm_reader: left_fieldnorms,
+ postings: left,
+ };
+ left_scorer.advance();
+ assert!(abs_diff(left_scorer.score(), 0.15342641) < 0.001f32);
+ }
+
+}
diff --git a/src/query/multi_term_query/multi_term_query.rs b/src/query/multi_term_query/multi_term_query.rs
index f7298a6..c5e4108 100644
--- a/src/query/multi_term_query/multi_term_query.rs
+++ b/src/query/multi_term_query/multi_term_query.rs
@@ -8,6 +8,7 @@ use core::searcher::Searcher;
use query::occur::Occur;
use query::occur_filter::OccurFilter;
use query::term_query::TermQuery;
+use postings::SegmentPostingsOption;
/// Query involving one or more terms.
@@ -36,7 +37,11 @@ impl MultiTermQuery {
.collect();
let occur_filter = OccurFilter::new(&occurs);
let weights = term_queries.iter()
- .map(|term_query| term_query.specialized_weight(searcher))
+ .map(|term_query| {
+ let mut term_weight = term_query.specialized_weight(searcher);
+ term_weight.segment_postings_options = SegmentPostingsOption::FreqAndPositions;
+ term_weight
+ })
.collect();
MultiTermWeight {
weights: weights,
diff --git a/src/query/phrase_query/mod.rs b/src/query/phrase_query/mod.rs
index 763e72b..12fe1f6 100644
--- a/src/query/phrase_query/mod.rs
+++ b/src/query/phrase_query/mod.rs
@@ -4,4 +4,65 @@ mod phrase_scorer;
pub use self::phrase_query::PhraseQuery;
pub use self::phrase_weight::PhraseWeight;
-pub use self::phrase_scorer::PhraseScorer; \ No newline at end of file
+pub use self::phrase_scorer::PhraseScorer;
+
+
+#[cfg(test)]
+mod tests {
+
+ use super::*;
+ use query::Query;
+ use core::Index;
+ use schema::{Document, Term, SchemaBuilder, TEXT};
+ use collector::tests::TestCollector;
+
+
+ #[test]
+ pub fn test_phrase_query() {
+
+ let mut schema_builder = SchemaBuilder::default();
+ let text_field = schema_builder.add_text_field("text", TEXT);
+ let schema = schema_builder.build();
+ let index = Index::create_in_ram(schema);
+ {
+ let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
+ {
+ let mut doc = Document::default();
+ doc.add_text(text_field, "a b b d c g c");
+ index_writer.add_document(doc).unwrap();
+ }
+ // {
+ // let mut doc = Document::default();
+ // doc.add_text(text_field, "a b a b c");
+ // index_writer.add_document(doc).unwrap();
+ // }
+ // {
+ // let mut doc = Document::default();
+ // doc.add_text(text_field, "c a b a d ga a");
+ // index_writer.add_document(doc).unwrap();
+ // }
+ // {
+ // let mut doc = Document::default();
+ // doc.add_text(text_field, "a b c");
+ // index_writer.add_document(doc).unwrap();
+ // }
+ assert!(index_writer.commit().is_ok());
+ }
+ let mut test_collector = TestCollector::default();
+ let build_query = |texts: Vec<&str>| {
+ let terms: Vec<Term> = texts
+ .iter()
+ .map(|text| {
+ Term::from_field_text(text_field, text)
+ })
+ .collect();
+ PhraseQuery::from(terms)
+ };
+ let phrase_query = build_query(vec!("a", "b"));
+ let searcher = index.searcher();
+ phrase_query.search(&*searcher, &mut test_collector).expect("search should succeed");
+ assert_eq!(test_collector.docs(), vec!(0, 1, 2, 3));
+ }
+
+
+}
diff --git a/src/query/phrase_query/phrase_query.rs b/src/query/phrase_query/phrase_query.rs
index 790e243..9058f96 100644
--- a/src/query/phrase_query/phrase_query.rs
+++ b/src/query/phrase_query/phrase_query.rs
@@ -33,8 +33,9 @@ impl Query for PhraseQuery {
}
-impl PhraseQuery {
- pub fn new(terms: Vec<Term>) -> PhraseQuery {
+
+impl From<Vec<Term>> for PhraseQuery {
+ fn from(terms: Vec<Term>) -> PhraseQuery {
assert!(terms.len() > 1);
let occur_terms: Vec<(Occur, Term)> = terms.into_iter()
.map(|term| (Occur::Must, term))
diff --git a/src/query/phrase_query/phrase_scorer.rs b/src/query/phrase_query/phrase_scorer.rs
index 4f3285b..b80422b 100644
--- a/src/query/phrase_query/phrase_scorer.rs
+++ b/src/query/phrase_query/phrase_scorer.rs
@@ -7,24 +7,51 @@ use postings::Postings;
use DocId;
pub struct PhraseScorer<'a> {
- pub all_term_scorer: BooleanScorer<TermScorer<SegmentPostings<'a>>>
+ pub all_term_scorer: BooleanScorer<TermScorer<SegmentPostings<'a>>>,
+ pub positions_offsets: Vec<u32>,
}
impl<'a> PhraseScorer<'a> {
fn phrase_match(&self) -> bool {
- let scorers = self.all_term_scorer.scorers();
- for scorer in scorers {
- let positions = scorer.postings().positions();
+ println!("phrase_match");
+ let mut positions_arr: Vec<&[u32]> = self.all_term_scorer
+ .scorers()
+ .iter()
+ .map(|scorer| {
+ println!("{:?}", scorer.doc());
+ scorer.postings().positions()
+ })
+ .collect();
+ println!("positions arr {:?}", positions_arr);
+ let mut cur = 0;
+ 'outer: loop {
+ for i in 0..positions_arr.len() {
+ let positions: &mut &[u32] = &mut positions_arr[i];
+ println!("{} {:?} {:?}", i, positions, self.positions_offsets);
+ if positions.len() == 0 {
+ return false;
+ }
+ let head_position = positions[0] + self.positions_offsets[i];
+ println!("cur: {}, head_position {}", cur, head_position);
+ while head_position < cur {
+ if positions.len() == 1 {
+ return false;
+ }
+ *positions = &(*positions)[1..];
+ }
+ if head_position != cur {
+ cur = head_position;
+ continue 'outer;
+ }
+ }
+ return true;
}
- true
- // self.all_term_scorer.positions();
- // let positions =
-
}
}
impl<'a> DocSet for PhraseScorer<'a> {
fn advance(&mut self,) -> bool {
+ println!("docset advance");
while self.all_term_scorer.advance() {
if self.phrase_match() {
return true;
diff --git a/src/query/phrase_query/phrase_weight.rs b/src/query/phrase_query/phrase_weight.rs
index bd8f271..b3d1e00 100644
--- a/src/query/phrase_query/phrase_weight.rs
+++ b/src/query/phrase_query/phrase_weight.rs
@@ -20,8 +20,10 @@ impl From<MultiTermWeight> for PhraseWeight {
impl Weight for PhraseWeight {
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
let all_term_scorer = try!(self.all_term_weight.specialized_scorer(reader));
+ let positions_offsets: Vec<u32> = (0u32..all_term_scorer.num_subscorers() as u32).collect();
Ok(box PhraseScorer {
- all_term_scorer: all_term_scorer
+ all_term_scorer: all_term_scorer,
+ positions_offsets: positions_offsets
})
}
}
diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs
index 11536ac..3ec748c 100644
--- a/src/query/term_query/term_query.rs
+++ b/src/query/term_query/term_query.rs
@@ -3,6 +3,7 @@ use Result;
use super::term_weight::TermWeight;
use query::Query;
use query::Weight;
+use postings::SegmentPostingsOption;
use Searcher;
use std::any::Any;
@@ -31,7 +32,8 @@ impl TermQuery {
TermWeight {
num_docs: searcher.num_docs(),
doc_freq: searcher.doc_freq(&self.term),
- term: self.term.clone()
+ term: self.term.clone(),
+ segment_postings_options: SegmentPostingsOption::NoFreq,
}
}
}
diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs
index 9637d1a..9d7bac3 100644
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -11,7 +11,8 @@ use Result;
pub struct TermWeight {
pub num_docs: u32,
pub doc_freq: u32,
- pub term: Term,
+ pub term: Term,
+ pub segment_postings_options: SegmentPostingsOption,
}
@@ -35,7 +36,7 @@ impl TermWeight {
let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field));
Ok(
reader
- .read_postings(&self.term, SegmentPostingsOption::Freq)
+ .read_postings(&self.term, self.segment_postings_options)
.map(|segment_postings|
TermScorer {
idf: self.idf(),