summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2016-11-02 12:54:06 +0900
committerPaul Masurel <paul.masurel@gmail.com>2016-11-02 17:58:20 +0900
commit6229a927308499e9af2a5ca96ca896cf327538d1 (patch)
treea33c8e3126159cc195a28a00cca95a8bca3cfa2d
parentc2c65d311d2e05570defe3860e82b28605680133 (diff)
issue/50 Removed SegmentPostingsTestFactory for just using VecPostings
-rw-r--r--src/common/mod.rs1
-rw-r--r--src/fastfield/reader.rs12
-rw-r--r--src/postings/docset.rs2
-rw-r--r--src/postings/mod.rs4
-rw-r--r--src/postings/postings.rs17
-rw-r--r--src/postings/segment_postings.rs14
-rw-r--r--src/postings/segment_postings_test_factory.rs87
-rw-r--r--src/query/boolean_query/boolean_query.rs11
-rw-r--r--src/query/boolean_query/boolean_scorer.rs51
-rw-r--r--src/query/boolean_query/boolean_weight.rs17
-rw-r--r--src/query/empty_scorer.rs22
-rw-r--r--src/query/mod.rs7
-rw-r--r--src/query/multi_term_query.rs9
-rw-r--r--src/query/occur.rs6
-rw-r--r--src/query/occur_filter.rs11
-rw-r--r--src/query/scorer.rs20
-rw-r--r--src/query/term_query/term_query.rs19
-rw-r--r--src/query/term_query/term_scorer.rs9
-rw-r--r--src/query/term_query/term_weight.rs44
-rw-r--r--src/query/weight.rs9
20 files changed, 153 insertions, 219 deletions
diff --git a/src/common/mod.rs b/src/common/mod.rs
index 77ff67e..d14e176 100644
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -25,3 +25,4 @@ pub trait HasLen {
self.len() == 0
}
}
+
diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs
index 1804602..3357995 100644
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -13,6 +13,14 @@ use fastfield::FastFieldSerializer;
use fastfield::U32FastFieldsWriter;
use super::compute_num_bits;
+
+lazy_static! {
+ static ref U32_FAST_FIELD_EMPTY: ReadOnlySource = {
+ let u32_fast_field = U32FastFieldReader::from(Vec::new());
+ u32_fast_field._data.clone()
+ };
+}
+
pub struct U32FastFieldReader {
_data: ReadOnlySource,
data_ptr: *const u8,
@@ -24,6 +32,10 @@ pub struct U32FastFieldReader {
impl U32FastFieldReader {
+ pub fn empty() -> U32FastFieldReader {
+ U32FastFieldReader::open(U32_FAST_FIELD_EMPTY.clone()).expect("should always work.")
+ }
+
pub fn min_val(&self,) -> u32 {
self.min_val
}
diff --git a/src/postings/docset.rs b/src/postings/docset.rs
index db40db6..faf8393 100644
--- a/src/postings/docset.rs
+++ b/src/postings/docset.rs
@@ -61,7 +61,7 @@ pub trait DocSet {
}
-impl<TDocSet: DocSet> DocSet for Box<TDocSet> {
+impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
fn advance(&mut self,) -> bool {
let unboxed: &mut TDocSet = self.borrow_mut();
diff --git a/src/postings/mod.rs b/src/postings/mod.rs
index e662885..0da67d2 100644
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -17,7 +17,6 @@ mod offset_postings;
mod freq_handler;
mod docset;
mod segment_postings_option;
-mod segment_postings_test_factory;
pub use self::docset::{SkipResult, DocSet};
pub use self::offset_postings::OffsetPostings;
@@ -37,9 +36,6 @@ pub use self::intersection::intersection;
pub use self::intersection::IntersectionDocSet;
pub use self::freq_handler::FreqHandler;
-#[cfg(test)]
-pub use self::segment_postings_test_factory::SegmentPostingsTestFactory;
-
pub use self::segment_postings_option::SegmentPostingsOption;
pub use common::HasLen;
diff --git a/src/postings/postings.rs b/src/postings/postings.rs
index 8b964d0..ff80387 100644
--- a/src/postings/postings.rs
+++ b/src/postings/postings.rs
@@ -1,8 +1,5 @@
use std::borrow::Borrow;
use postings::docset::DocSet;
-use common::HasLen;
-
-
/// Postings (also called inverted list)
///
@@ -51,17 +48,3 @@ impl<'a, TPostings: Postings> Postings for &'a mut TPostings {
}
-
-impl<THasLen: HasLen> HasLen for Box<THasLen> {
- fn len(&self,) -> usize {
- let unboxed: &THasLen = self.borrow();
- unboxed.borrow().len()
- }
-}
-
-impl<'a> HasLen for &'a HasLen {
- fn len(&self,) -> usize {
- let unref: &HasLen = *self;
- unref.len()
- }
-}
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs
index 19b2e2e..cac9b86 100644
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -2,9 +2,9 @@ use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
use DocId;
use postings::{Postings, FreqHandler, DocSet, HasLen};
use std::num::Wrapping;
-use std::borrow::Cow;
+const EMPTY_DATA: [u8; 0] = [0u8; 0];
/// `SegmentPostings` represents the inverted list or postings associated to
/// a term in a `Segment`.
@@ -52,6 +52,18 @@ impl<'a> SegmentPostings<'a> {
}
}
+ /// Returns an empty segment postings object
+ pub fn empty() -> SegmentPostings<'static> {
+ SegmentPostings {
+ len: 0,
+ doc_offset: 0,
+ block_decoder: SIMDBlockDecoder::new(),
+ freq_handler: FreqHandler::new_without_freq(),
+ remaining_data: &EMPTY_DATA,
+ cur: Wrapping(usize::max_value()),
+ }
+ }
+
/// Index within a block is used as an address when
/// interacting with the `FreqHandler`
fn index_within_block(&self,) -> usize {
diff --git a/src/postings/segment_postings_test_factory.rs b/src/postings/segment_postings_test_factory.rs
deleted file mode 100644
index e1aefa4..0000000
--- a/src/postings/segment_postings_test_factory.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-use super::FreqHandler;
-use DocId;
-use std::mem;
-use std::path::{Path, PathBuf};
-use super::SegmentPostings;
-use super::serializer::PostingsSerializer;
-use schema::{SchemaBuilder, STRING};
-use directory::{RAMDirectory, Directory};
-use schema::Term;
-
-
-const EMPTY_POSITIONS: [DocId; 0] = [0u32; 0];
-
-pub struct SegmentPostingsTestFactory {
- directory: RAMDirectory,
- i: usize,
-}
-
-impl Default for SegmentPostingsTestFactory {
- fn default() -> SegmentPostingsTestFactory {
- SegmentPostingsTestFactory {
- directory: RAMDirectory::create(),
- i: 0
- }
- }
-}
-
-
-//data: Vec<u8>,
-//len: u32,
-
-impl SegmentPostingsTestFactory {
- pub fn from_data<'a>(&'a self, doc_ids: Vec<DocId>) -> SegmentPostings<'a> {
- let mut schema_builder = SchemaBuilder::default();
- let field = schema_builder.add_text_field("text", STRING);
- let schema = schema_builder.build();
-
- let postings_path = PathBuf::from(format!("postings{}", self.i));
- let terms_path = PathBuf::from(format!("terms{}", self.i));
- let positions_path = PathBuf::from(format!("positions{}", self.i));
- self.i += 1;
-
- let mut directory = self.directory.clone();
- let mut postings_serializer = PostingsSerializer::new(
- directory.open_write(&terms_path).unwrap(),
- directory.open_write(&postings_path).unwrap(),
- directory.open_write(&positions_path).unwrap(),
- schema
- ).unwrap();
- let term = Term::from_field_text(field, "dummy");
- postings_serializer.new_term(&term, doc_ids.len() as u32);
- for doc_id in &doc_ids {
- postings_serializer.write_doc(*doc_id, 1u32, &EMPTY_POSITIONS);
- }
- postings_serializer.close_term();
- postings_serializer.close();
- let postings_data = self.directory.open_read(&postings_path).unwrap();
- let ref_postings_data = unsafe {
- mem::transmute::<&[u8], &'a [u8]>(postings_data.as_slice())
- };
- SegmentPostings::from_data(doc_ids.len() as u32, ref_postings_data, FreqHandler::new_without_freq())
- }
-}
-
-
-#[cfg(test)]
-mod tests {
-
- use super::*;
- use postings::DocSet;
-
- #[test]
- pub fn test_segment_postings_tester() {
- let segment_postings_tester = SegmentPostingsTestFactory::default();
- let mut postings = segment_postings_tester.from_data(vec!(1,2,17,32));
- assert!(postings.advance());
- assert_eq!(postings.doc(), 1);
- assert!(postings.advance());
- assert_eq!(postings.doc(), 2);
- assert!(postings.advance());
- assert_eq!(postings.doc(), 17);
- assert!(postings.advance());
- assert_eq!(postings.doc(), 32);
- assert!(!postings.advance());
- }
-
-}
diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs
index 2e9c24b..abecdb1 100644
--- a/src/query/boolean_query/boolean_query.rs
+++ b/src/query/boolean_query/boolean_query.rs
@@ -8,7 +8,16 @@ use query::Query;
use query::Occur;
use query::OccurFilter;
-
+/// The boolean query combines a set of queries
+///
+/// The documents matched by the boolean query are
+/// those which
+/// * match all of the sub queries associated with the
+/// `Must` occurence
+/// * match none of the sub queries associated with the
+/// `MustNot` occurence.
+/// * match at least one of the subqueries that is not
+/// a `MustNot` occurence.
#[derive(Debug)]
pub struct BooleanQuery {
clauses: Vec<BooleanClause>,
diff --git a/src/query/boolean_query/boolean_scorer.rs b/src/query/boolean_query/boolean_scorer.rs
index 0624876..7f7d2d2 100644
--- a/src/query/boolean_query/boolean_scorer.rs
+++ b/src/query/boolean_query/boolean_scorer.rs
@@ -1,6 +1,5 @@
use query::Scorer;
use DocId;
-use Score;
use std::collections::BinaryHeap;
use std::cmp::Ordering;
use postings::DocSet;
@@ -38,17 +37,13 @@ pub struct BooleanScorer<TScorer: Scorer> {
queue: BinaryHeap<HeapItem>,
doc: DocId,
score_combiner: ScoreCombiner,
- filter: OccurFilter,
+ occur_filter: OccurFilter,
}
impl<TScorer: Scorer> BooleanScorer<TScorer> {
- pub fn set_score_combiner(&mut self, score_combiner: ScoreCombiner) {
- self.score_combiner = score_combiner;
- }
-
pub fn new(postings: Vec<TScorer>,
- filter: OccurFilter) -> BooleanScorer<TScorer> {
+ occur_filter: OccurFilter) -> BooleanScorer<TScorer> {
let score_combiner = ScoreCombiner::default_for_num_scorers(postings.len());
let mut non_empty_postings: Vec<TScorer> = Vec::new();
for mut posting in postings {
@@ -73,7 +68,7 @@ impl<TScorer: Scorer> BooleanScorer<TScorer> {
queue: BinaryHeap::from(heap_items),
doc: 0u32,
score_combiner: score_combiner,
- filter: filter,
+ occur_filter: occur_filter,
}
}
@@ -131,7 +126,7 @@ impl<TScorer: Scorer> DocSet for BooleanScorer<TScorer> {
}
self.advance_head();
}
- if self.filter.accept(ord_bitset) {
+ if self.occur_filter.accept(ord_bitset) {
return true;
}
}
@@ -160,33 +155,21 @@ mod tests {
use query::Scorer;
use query::OccurFilter;
use query::term_query::TermScorer;
- use directory::Directory;
- use directory::RAMDirectory;
- use schema::Field;
- use super::super::ScoreCombiner;
- use std::path::Path;
use query::Occur;
- use postings::SegmentPostingsTestFactory;
- use postings::Postings;
- use fastfield::{U32FastFieldReader, U32FastFieldWriter, FastFieldSerializer};
+ use fastfield::{U32FastFieldReader};
-
-
fn abs_diff(left: f32, right: f32) -> f32 {
(right - left).abs()
}
-
- lazy_static! {
- static ref segment_postings_test_factory: SegmentPostingsTestFactory = SegmentPostingsTestFactory::default();
- }
-
+
#[test]
pub fn test_boolean_scorer() {
let occurs = vec!(Occur::Should, Occur::Should);
let occur_filter = OccurFilter::new(&occurs);
let left_fieldnorms = U32FastFieldReader::from(vec!(100,200,300));
- let left = segment_postings_test_factory.from_data(vec!(1, 2, 3));
+
+ let left = VecPostings::from(vec!(1, 2, 3));
let left_scorer = TermScorer {
idf: 1f32,
fieldnorm_reader: left_fieldnorms,
@@ -194,22 +177,22 @@ mod tests {
};
let right_fieldnorms = U32FastFieldReader::from(vec!(15,25,35));
- let right = segment_postings_test_factory.from_data(vec!(1, 3, 8));
- let mut right_scorer = TermScorer {
+ let right = VecPostings::from(vec!(1, 3, 8));
+
+ let right_scorer = TermScorer {
idf: 4f32,
fieldnorm_reader: right_fieldnorms,
segment_postings: right,
};
- let score_combiner = ScoreCombiner::from(vec!(0f32, 1f32, 2f32));
+
let mut boolean_scorer = BooleanScorer::new(vec!(left_scorer, right_scorer), occur_filter);
- boolean_scorer.set_score_combiner(score_combiner);
assert_eq!(boolean_scorer.next(), Some(1u32));
- assert!(abs_diff(boolean_scorer.score(), 1.7414213) < 0.001);
+ assert!(abs_diff(boolean_scorer.score(), 0.8707107) < 0.001);
assert_eq!(boolean_scorer.next(), Some(2u32));
- assert!(abs_diff(boolean_scorer.score(), 0.057735026) < 0.001f32);
+ assert!(abs_diff(boolean_scorer.score(), 0.028867513) < 0.001f32);
assert_eq!(boolean_scorer.next(), Some(3u32));
assert_eq!(boolean_scorer.next(), Some(8u32));
- assert!(abs_diff(boolean_scorer.score(), 1.0327955) < 0.001f32);
+ assert!(abs_diff(boolean_scorer.score(), 0.5163978) < 0.001f32);
assert!(!boolean_scorer.advance());
}
@@ -219,9 +202,9 @@ mod tests {
let left_fieldnorms = U32FastFieldReader::from(vec!(10, 4));
assert_eq!(left_fieldnorms.get(0), 10);
assert_eq!(left_fieldnorms.get(1), 4);
- let left = segment_postings_test_factory.from_data(vec!(1));
+ let left = VecPostings::from(vec!(1));
let mut left_scorer = TermScorer {
- idf: 0.30685282, // 1f32,
+ idf: 0.30685282,
fieldnorm_reader: left_fieldnorms,
segment_postings: left,
};
diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs
index ee0c378..930b473 100644
--- a/src/query/boolean_query/boolean_weight.rs
+++ b/src/query/boolean_query/boolean_weight.rs
@@ -1,6 +1,7 @@
use query::Weight;
use core::SegmentReader;
use query::Scorer;
+use super::BooleanScorer;
use query::OccurFilter;
use Result;
@@ -23,11 +24,13 @@ impl BooleanWeight {
impl Weight for BooleanWeight {
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
- // BooleanScorer {
-
- // }
- panic!("");
-
+ let sub_scorers: Vec<Box<Scorer + 'a>> = try!(
+ self.weights
+ .iter()
+ .map(|weight| weight.scorer(reader))
+ .collect()
+ );
+ let boolean_scorer = BooleanScorer::new(sub_scorers, self.occur_filter);
+ Ok(box boolean_scorer)
}
-
-} \ No newline at end of file
+}
diff --git a/src/query/empty_scorer.rs b/src/query/empty_scorer.rs
deleted file mode 100644
index 0c1e989..0000000
--- a/src/query/empty_scorer.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-use query::Scorer;
-use DocSet;
-use Score;
-use DocId;
-
-pub struct EmptyScorer;
-
-impl Scorer for EmptyScorer {
- fn score(&self) -> Score {
- 0f32
- }
-}
-
-impl DocSet for EmptyScorer {
- fn advance(&mut self) -> bool {
- false
- }
-
- fn doc(&self) -> DocId {
- 0
- }
-}
diff --git a/src/query/mod.rs b/src/query/mod.rs
index 788aa67..a74d71f 100644
--- a/src/query/mod.rs
+++ b/src/query/mod.rs
@@ -16,13 +16,8 @@ mod similarity;
mod weight;
mod occur_filter;
mod term_query;
-mod empty_scorer;
-
-
-pub use self::empty_scorer::EmptyScorer;
pub use self::occur_filter::OccurFilter;
-
pub use self::similarity::Similarity;
pub use self::boolean_query::BooleanQuery;
pub use self::occur::Occur;
@@ -35,4 +30,4 @@ pub use self::query_parser::QueryParser;
pub use self::explanation::Explanation;
pub use self::multi_term_accumulator::MultiTermAccumulator;
pub use self::query_parser::ParsingError;
-pub use self::weight::Weight; \ No newline at end of file
+pub use self::weight::Weight;
diff --git a/src/query/multi_term_query.rs b/src/query/multi_term_query.rs
index 5114899..4213567 100644
--- a/src/query/multi_term_query.rs
+++ b/src/query/multi_term_query.rs
@@ -11,7 +11,6 @@ use query::occur_filter::OccurFilter;
use query::term_query::{TermQuery, TermWeight, TermScorer};
use query::boolean_query::BooleanScorer;
-
struct MultiTermWeight {
weights: Vec<TermWeight>,
occur_filter: OccurFilter,
@@ -21,12 +20,10 @@ struct MultiTermWeight {
impl Weight for MultiTermWeight {
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
- let mut term_scorers: Vec<TermScorer<'a>> = Vec::new();
+ let mut term_scorers: Vec<TermScorer<_>> = Vec::new();
for term_weight in &self.weights {
- let term_scorer_option = try!(term_weight.specialized_scorer(reader));
- if let Some(term_scorer) = term_scorer_option {
- term_scorers.push(term_scorer);
- }
+ let term_scorer = try!(term_weight.specialized_scorer(reader));
+ term_scorers.push(term_scorer);
}
Ok(box BooleanScorer::new(term_scorers, self.occur_filter.clone()))
}
diff --git a/src/query/occur.rs b/src/query/occur.rs
index 86bade9..1f42b4c 100644
--- a/src/query/occur.rs
+++ b/src/query/occur.rs
@@ -2,9 +2,9 @@
/// should be present or must not be present.
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum Occur {
- /// The term should be present in the document.
- /// Document without the term will be considered
- /// in scoring as well.
+ /// For a given document to be considered for scoring,
+ /// at least one of the document with the Should or the Must
+ /// Occur constraint must be within the document.
Should,
/// Document without the term are excluded from the search.
Must,
diff --git a/src/query/occur_filter.rs b/src/query/occur_filter.rs
index 188fc63..53280fa 100644
--- a/src/query/occur_filter.rs
+++ b/src/query/occur_filter.rs
@@ -1,16 +1,25 @@
use query::Occur;
-#[derive(Clone)]
+
+/// An OccurFilter represents a filter over a bitset of
+// at most 64 elements.
+///
+/// It wraps some simple bitmask to compute the filter
+/// rapidly.
+#[derive(Clone, Copy)]
pub struct OccurFilter {
and_mask: u64,
result: u64,
}
impl OccurFilter {
+
+ /// Returns true if the bitset is matching the occur list.
pub fn accept(&self, ord_set: u64) -> bool {
(self.and_mask & ord_set) == self.result
}
+ /// Builds an `OccurFilter` from a list of `Occur`.
pub fn new(occurs: &[Occur]) -> OccurFilter {
let mut and_mask = 0u64;
let mut result = 0u64;
diff --git a/src/query/scorer.rs b/src/query/scorer.rs
index 32ed4b4..7513705 100644
--- a/src/query/scorer.rs
+++ b/src/query/scorer.rs
@@ -1,7 +1,10 @@
use DocSet;
use collector::Collector;
+use std::ops::{Deref, DerefMut};
-/// Scored `DocSet`
+/// Scored set of documents matching a query within a specific segment.
+///
+/// See [Query](./trait.Query.html).
pub trait Scorer: DocSet {
/// Returns the score.
@@ -9,6 +12,8 @@ pub trait Scorer: DocSet {
/// This method will perform a bit of computation and is not cached.
fn score(&self,) -> f32;
+ /// Consumes the complete `DocSet` and
+ /// push the scored documents to the collector.
fn collect(&mut self, collector: &mut Collector) {
while self.advance() {
collector.collect(self.doc(), self.score());
@@ -16,3 +21,16 @@ pub trait Scorer: DocSet {
}
}
+
+impl<'a> Scorer for Box<Scorer + 'a> {
+ fn score(&self,) -> f32 {
+ self.deref().score()
+ }
+
+ fn collect(&mut self, collector: &mut Collector) {
+ let scorer = self.deref_mut();
+ while scorer.advance() {
+ collector.collect(scorer.doc(), scorer.score());
+ }
+ }
+} \ No newline at end of file
diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs
index 6049d84..11536ac 100644
--- a/src/query/term_query/term_query.rs
+++ b/src/query/term_query/term_query.rs
@@ -6,16 +6,31 @@ use query::Weight;
use Searcher;
use std::any::Any;
+/// A Term query matches all of the documents
+/// containing a specific term.
+///
+/// The score associated is defined as
+/// `idf` * sqrt(`term_freq` / `field norm`)
+/// in which :
+/// * idf - inverse document frequency.
+/// * term_freq - number of occurrences of the term in the field
+/// * field norm - number of tokens in the field.
#[derive(Debug)]
pub struct TermQuery {
term: Term,
}
impl TermQuery {
+
+ /// Returns a weight object.
+ ///
+ /// While `.weight(...)` returns a boxed trait object,
+ /// this method return a specific implementation.
+ /// This is useful for optimization purpose.
pub fn specialized_weight(&self, searcher: &Searcher) -> TermWeight {
- let doc_freq = searcher.doc_freq(&self.term);
TermWeight {
- doc_freq: doc_freq,
+ num_docs: searcher.num_docs(),
+ doc_freq: searcher.doc_freq(&self.term),
term: self.term.clone()
}
}
diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs
index e7c3bf6..db1f484 100644
--- a/src/query/term_query/term_scorer.rs
+++ b/src/query/term_query/term_scorer.rs
@@ -1,18 +1,17 @@
use Score;
use DocId;
-use postings::SegmentPostings;
use fastfield::U32FastFieldReader;
use postings::DocSet;
use query::Scorer;
use postings::Postings;
-pub struct TermScorer<'a> {
+pub struct TermScorer<TPostings> where TPostings: Postings {
pub idf: Score,
pub fieldnorm_reader: U32FastFieldReader,
- pub segment_postings: SegmentPostings<'a>,
+ pub segment_postings: TPostings,
}
-impl<'a> DocSet for TermScorer<'a> {
+impl<TPostings> DocSet for TermScorer<TPostings> where TPostings: Postings {
fn advance(&mut self,) -> bool {
self.segment_postings.advance()
@@ -23,7 +22,7 @@ impl<'a> DocSet for TermScorer<'a> {
}
}
-impl<'a> Scorer for TermScorer<'a> {
+impl<TPostings> Scorer for TermScorer<TPostings> where TPostings: Postings {
fn score(&self,) -> Score {
let doc = self.segment_postings.doc();
let field_norm = self.fieldnorm_reader.get(doc);
diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs
index acd5ffe..4f99904 100644
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -2,12 +2,14 @@ use Term;
use query::Weight;
use core::SegmentReader;
use query::Scorer;
-use query::EmptyScorer;
use postings::SegmentPostingsOption;
+use postings::SegmentPostings;
+use fastfield::U32FastFieldReader;
use super::term_scorer::TermScorer;
use Result;
pub struct TermWeight {
+ pub num_docs: u32,
pub doc_freq: u32,
pub term: Term,
}
@@ -16,33 +18,37 @@ pub struct TermWeight {
impl Weight for TermWeight {
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
- let specialized_scorer_option = try!(self.specialized_scorer(reader));
- match specialized_scorer_option {
- Some(term_scorer) => {
- Ok(box term_scorer)
- }
- None => {
- Ok(box EmptyScorer)
- }
- }
+ let specialized_scorer = try!(self.specialized_scorer(reader));
+ Ok(box specialized_scorer)
}
}
impl TermWeight {
- pub fn specialized_scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Option<TermScorer<'a>>> {
+ fn idf(&self) -> f32 {
+ 1.0 + (self.num_docs as f32 / (self.doc_freq as f32 + 1.0)).ln()
+ }
+
+ pub fn specialized_scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<TermScorer<SegmentPostings<'a>>> {
let field = self.term.field();
let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field));
Ok(
- reader.read_postings(&self.term, SegmentPostingsOption::Freq)
- .map(|segment_postings|
- TermScorer {
- idf: 1f32 / (self.doc_freq as f32),
- fieldnorm_reader: fieldnorm_reader,
- segment_postings: segment_postings,
- }
- )
+ reader
+ .read_postings(&self.term, SegmentPostingsOption::Freq)
+ .map(|segment_postings|
+ TermScorer {
+ idf: self.idf(),
+ fieldnorm_reader: fieldnorm_reader,
+ segment_postings: segment_postings,
+ }
+ )
+ .unwrap_or(
+ TermScorer {
+ idf: 1f32,
+ fieldnorm_reader: U32FastFieldReader::empty(),
+ segment_postings: SegmentPostings::empty()
+ })
)
}
diff --git a/src/query/weight.rs b/src/query/weight.rs
index 27a7afd..db583a3 100644
--- a/src/query/weight.rs
+++ b/src/query/weight.rs
@@ -2,10 +2,15 @@ use super::Scorer;
use Result;
use core::SegmentReader;
+
+/// A Weight is the specialization of a Query
+/// for a given set of segments.
+///
+/// See [Query](./trait.Query.html).
pub trait Weight {
-
+ /// Returns the scorer for the given segment.
+ /// See [Query](./trait.Query.html).
fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>>;
-
}