issue/50 Removed SegmentPostingsTestFactory for just using VecPostings

author: Paul Masurel <paul.masurel@gmail.com> 2016-11-02 12:54:06 +0900
committer: Paul Masurel <paul.masurel@gmail.com> 2016-11-02 17:58:20 +0900
commit: 6229a927308499e9af2a5ca96ca896cf327538d1 (patch)
tree: a33c8e3126159cc195a28a00cca95a8bca3cfa2d
parent: c2c65d311d2e05570defe3860e82b28605680133 (diff)
20 files changed, 153 insertions, 219 deletions
diff --git a/src/common/mod.rs b/src/common/mod.rs
index 77ff67e..d14e176 100644
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -25,3 +25,4 @@ pub trait HasLen {
         self.len() == 0
     }
 }
+
diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs
index 1804602..3357995 100644
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -13,6 +13,14 @@ use fastfield::FastFieldSerializer;
 use fastfield::U32FastFieldsWriter;
 use super::compute_num_bits;
 
+
+lazy_static! {
+    static ref U32_FAST_FIELD_EMPTY: ReadOnlySource = {
+        let u32_fast_field = U32FastFieldReader::from(Vec::new());
+        u32_fast_field._data.clone()
+    };
+}
+
 pub struct U32FastFieldReader {
     _data: ReadOnlySource,
     data_ptr: *const u8,
@@ -24,6 +32,10 @@ pub struct U32FastFieldReader {
 
 impl U32FastFieldReader {
 
+    pub fn empty() -> U32FastFieldReader {
+        U32FastFieldReader::open(U32_FAST_FIELD_EMPTY.clone()).expect("should always work.")
+    }
+
     pub fn min_val(&self,) -> u32 {
         self.min_val
     }
diff --git a/src/postings/docset.rs b/src/postings/docset.rs
index db40db6..faf8393 100644
--- a/src/postings/docset.rs
+++ b/src/postings/docset.rs
@@ -61,7 +61,7 @@ pub trait DocSet {
 }
 
 
-impl<TDocSet: DocSet> DocSet for Box<TDocSet> {
+impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
 
     fn advance(&mut self,) -> bool {
         let unboxed: &mut TDocSet = self.borrow_mut();
diff --git a/src/postings/mod.rs b/src/postings/mod.rs
index e662885..0da67d2 100644
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -17,7 +17,6 @@ mod offset_postings;
 mod freq_handler;
 mod docset;
 mod segment_postings_option;
-mod segment_postings_test_factory;
 
 pub use self::docset::{SkipResult, DocSet};
 pub use self::offset_postings::OffsetPostings;
@@ -37,9 +36,6 @@ pub use self::intersection::intersection;
 pub use self::intersection::IntersectionDocSet;
 pub use self::freq_handler::FreqHandler;
 
-#[cfg(test)]
-pub use self::segment_postings_test_factory::SegmentPostingsTestFactory;
-
 pub use self::segment_postings_option::SegmentPostingsOption;
 pub use common::HasLen;
 
diff --git a/src/postings/postings.rs b/src/postings/postings.rs
index 8b964d0..ff80387 100644
--- a/src/postings/postings.rs
+++ b/src/postings/postings.rs
@@ -1,8 +1,5 @@
 use std::borrow::Borrow;
 use postings::docset::DocSet;
-use common::HasLen;
-
-
 
 /// Postings (also called inverted list)
 ///
@@ -51,17 +48,3 @@ impl<'a, TPostings: Postings> Postings for &'a mut TPostings {
 }
 
 
-
-impl<THasLen: HasLen> HasLen for Box<THasLen> {
-     fn len(&self,) -> usize {
-         let unboxed: &THasLen = self.borrow();
-        unboxed.borrow().len()
-     }
-}
-
-impl<'a> HasLen for &'a HasLen {
-    fn len(&self,) -> usize {
-        let unref: &HasLen = *self;
-        unref.len()
-    }
-}
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs
index 19b2e2e..cac9b86 100644
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -2,9 +2,9 @@ use compression::{NUM_DOCS_PER_BLOCK, SIMDBlockDecoder};
 use DocId;
 use postings::{Postings, FreqHandler, DocSet, HasLen};
 use std::num::Wrapping;
-use std::borrow::Cow;
 
 
+const EMPTY_DATA: [u8; 0] = [0u8; 0];
 
 /// `SegmentPostings` represents the inverted list or postings associated to 
 /// a term in a `Segment`.
@@ -52,6 +52,18 @@ impl<'a> SegmentPostings<'a> {
         }
     }
     
+    /// Returns an empty segment postings object
+    pub fn empty() -> SegmentPostings<'static> {
+        SegmentPostings {
+            len: 0,
+            doc_offset: 0,
+            block_decoder: SIMDBlockDecoder::new(),
+            freq_handler: FreqHandler::new_without_freq(),
+            remaining_data: &EMPTY_DATA,
+            cur: Wrapping(usize::max_value()),
+        }
+    }
+
     /// Index within a block is used as an address when
     /// interacting with the `FreqHandler` 
     fn index_within_block(&self,) -> usize {
diff --git a/src/postings/segment_postings_test_factory.rs b/src/postings/segment_postings_test_factory.rs
deleted file mode 100644
index e1aefa4..0000000
--- a/src/postings/segment_postings_test_factory.rs
+++ /dev/null
@@ -1,87 +0,0 @@
-use super::FreqHandler;
-use DocId;
-use std::mem;
-use std::path::{Path, PathBuf};
-use super::SegmentPostings;
-use super::serializer::PostingsSerializer;
-use schema::{SchemaBuilder, STRING};
-use directory::{RAMDirectory, Directory};
-use schema::Term;
-
-
-const EMPTY_POSITIONS: [DocId; 0] = [0u32; 0];
-
-pub struct SegmentPostingsTestFactory {
-    directory: RAMDirectory,
-    i: usize,
-}
-
-impl Default for SegmentPostingsTestFactory {
-    fn default() -> SegmentPostingsTestFactory {
-        SegmentPostingsTestFactory {
-            directory: RAMDirectory::create(),
-            i: 0
-        }
-    }
-}
-
-
-//data: Vec<u8>,
-//len: u32,
-
-impl SegmentPostingsTestFactory {
-    pub fn from_data<'a>(&'a self, doc_ids: Vec<DocId>) -> SegmentPostings<'a> {
-        let mut schema_builder = SchemaBuilder::default();
-        let field = schema_builder.add_text_field("text", STRING);
-        let schema = schema_builder.build();
-        
-        let postings_path = PathBuf::from(format!("postings{}", self.i));
-        let terms_path = PathBuf::from(format!("terms{}", self.i));
-        let positions_path = PathBuf::from(format!("positions{}", self.i));
-        self.i += 1;
-    
-        let mut directory = self.directory.clone();
-        let mut postings_serializer = PostingsSerializer::new(
-            directory.open_write(&terms_path).unwrap(),
-            directory.open_write(&postings_path).unwrap(),
-            directory.open_write(&positions_path).unwrap(),
-            schema
-        ).unwrap();
-        let term = Term::from_field_text(field, "dummy");
-        postings_serializer.new_term(&term, doc_ids.len() as u32);
-        for doc_id in &doc_ids {
-            postings_serializer.write_doc(*doc_id, 1u32, &EMPTY_POSITIONS);
-        }
-        postings_serializer.close_term();
-        postings_serializer.close();
-        let postings_data = self.directory.open_read(&postings_path).unwrap();
-        let ref_postings_data = unsafe {
-            mem::transmute::<&[u8], &'a [u8]>(postings_data.as_slice())
-        };
-        SegmentPostings::from_data(doc_ids.len() as u32, ref_postings_data, FreqHandler::new_without_freq())
-    }
-}
-
-
-#[cfg(test)]
-mod tests {
-    
-    use super::*;
-    use postings::DocSet;
-      
-    #[test]
-    pub fn test_segment_postings_tester() {
-        let segment_postings_tester = SegmentPostingsTestFactory::default();
-        let mut postings = segment_postings_tester.from_data(vec!(1,2,17,32));
-        assert!(postings.advance());
-        assert_eq!(postings.doc(), 1);
-        assert!(postings.advance());
-        assert_eq!(postings.doc(), 2);
-        assert!(postings.advance());
-        assert_eq!(postings.doc(), 17);
-        assert!(postings.advance());
-        assert_eq!(postings.doc(), 32);
-        assert!(!postings.advance());
-    }
-
-}
diff --git a/src/query/boolean_query/boolean_query.rs b/src/query/boolean_query/boolean_query.rs
index 2e9c24b..abecdb1 100644
--- a/src/query/boolean_query/boolean_query.rs
+++ b/src/query/boolean_query/boolean_query.rs
@@ -8,7 +8,16 @@ use query::Query;
 use query::Occur;
 use query::OccurFilter;
 
-
+/// The boolean query combines a set of queries
+///
+/// The documents matched by the boolean query are
+/// those which
+/// * match all of the sub queries associated with the 
+/// `Must` occurence
+/// * match none of the sub queries associated with the 
+/// `MustNot` occurence.
+/// * match at least one of the subqueries that is not 
+/// a `MustNot` occurence.
 #[derive(Debug)]
 pub struct BooleanQuery {
     clauses: Vec<BooleanClause>,
diff --git a/src/query/boolean_query/boolean_scorer.rs b/src/query/boolean_query/boolean_scorer.rs
index 0624876..7f7d2d2 100644
--- a/src/query/boolean_query/boolean_scorer.rs
+++ b/src/query/boolean_query/boolean_scorer.rs
@@ -1,6 +1,5 @@
 use query::Scorer;
 use DocId;
-use Score;
 use std::collections::BinaryHeap;
 use std::cmp::Ordering;
 use postings::DocSet;
@@ -38,17 +37,13 @@ pub struct BooleanScorer<TScorer: Scorer> {
     queue: BinaryHeap<HeapItem>,
     doc: DocId,
     score_combiner: ScoreCombiner,
-    filter: OccurFilter,
+    occur_filter: OccurFilter,
 }
 
 impl<TScorer: Scorer> BooleanScorer<TScorer> {
     
-    pub fn set_score_combiner(&mut self, score_combiner: ScoreCombiner)  {
-        self.score_combiner = score_combiner;
-    }
-    
     pub fn new(postings: Vec<TScorer>,
-               filter: OccurFilter) -> BooleanScorer<TScorer> {
+               occur_filter: OccurFilter) -> BooleanScorer<TScorer> {
         let score_combiner = ScoreCombiner::default_for_num_scorers(postings.len());
         let mut non_empty_postings: Vec<TScorer> = Vec::new();
         for mut posting in postings {
@@ -73,7 +68,7 @@ impl<TScorer: Scorer> BooleanScorer<TScorer> {
             queue: BinaryHeap::from(heap_items),
             doc: 0u32,
             score_combiner: score_combiner,
-            filter: filter,
+            occur_filter: occur_filter,
             
         }
     }
@@ -131,7 +126,7 @@ impl<TScorer: Scorer> DocSet for BooleanScorer<TScorer> {
                 }
                 self.advance_head();
             } 
-            if self.filter.accept(ord_bitset) {
+            if self.occur_filter.accept(ord_bitset) {
                 return true;
             }
         }
@@ -160,33 +155,21 @@ mod tests {
     use query::Scorer;
     use query::OccurFilter;
     use query::term_query::TermScorer;
-    use directory::Directory;
-    use directory::RAMDirectory;
-    use schema::Field;
-    use super::super::ScoreCombiner;
-    use std::path::Path;
     use query::Occur;
-    use postings::SegmentPostingsTestFactory;
-    use postings::Postings;
-    use fastfield::{U32FastFieldReader, U32FastFieldWriter, FastFieldSerializer};
+    use fastfield::{U32FastFieldReader};
 
-    
-   
     fn abs_diff(left: f32, right: f32) -> f32 {
         (right - left).abs()
     }   
-    
-    lazy_static! {
-        static ref segment_postings_test_factory: SegmentPostingsTestFactory = SegmentPostingsTestFactory::default();
-    }
-    
+
     #[test]
     pub fn test_boolean_scorer() {
         let occurs = vec!(Occur::Should, Occur::Should);
         let occur_filter = OccurFilter::new(&occurs);
        
         let left_fieldnorms = U32FastFieldReader::from(vec!(100,200,300));
-        let left = segment_postings_test_factory.from_data(vec!(1, 2, 3));
+        
+        let left = VecPostings::from(vec!(1, 2, 3));
         let left_scorer = TermScorer {
             idf: 1f32,
             fieldnorm_reader: left_fieldnorms,
@@ -194,22 +177,22 @@ mod tests {
         };
         
         let right_fieldnorms = U32FastFieldReader::from(vec!(15,25,35));
-        let right = segment_postings_test_factory.from_data(vec!(1, 3, 8));
-        let mut right_scorer = TermScorer {
+        let right = VecPostings::from(vec!(1, 3, 8));
+        
+        let right_scorer = TermScorer {
             idf: 4f32,
             fieldnorm_reader: right_fieldnorms,
             segment_postings: right,
         };
-        let score_combiner = ScoreCombiner::from(vec!(0f32, 1f32, 2f32));
+
         let mut boolean_scorer = BooleanScorer::new(vec!(left_scorer, right_scorer), occur_filter);
-        boolean_scorer.set_score_combiner(score_combiner);
         assert_eq!(boolean_scorer.next(), Some(1u32));
-        assert!(abs_diff(boolean_scorer.score(), 1.7414213) < 0.001);
+        assert!(abs_diff(boolean_scorer.score(), 0.8707107) < 0.001);
         assert_eq!(boolean_scorer.next(), Some(2u32));
-        assert!(abs_diff(boolean_scorer.score(), 0.057735026) < 0.001f32);
+        assert!(abs_diff(boolean_scorer.score(), 0.028867513) < 0.001f32);
         assert_eq!(boolean_scorer.next(), Some(3u32));
         assert_eq!(boolean_scorer.next(), Some(8u32));
-        assert!(abs_diff(boolean_scorer.score(), 1.0327955) < 0.001f32);
+        assert!(abs_diff(boolean_scorer.score(), 0.5163978) < 0.001f32);
         assert!(!boolean_scorer.advance());
     }
     
@@ -219,9 +202,9 @@ mod tests {
         let left_fieldnorms = U32FastFieldReader::from(vec!(10, 4));
         assert_eq!(left_fieldnorms.get(0), 10);
         assert_eq!(left_fieldnorms.get(1), 4);
-        let left = segment_postings_test_factory.from_data(vec!(1));
+        let left = VecPostings::from(vec!(1));
         let mut left_scorer = TermScorer {
-            idf: 0.30685282, // 1f32,
+            idf: 0.30685282,
             fieldnorm_reader: left_fieldnorms,
             segment_postings: left,
         };
diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs
index ee0c378..930b473 100644
--- a/src/query/boolean_query/boolean_weight.rs
+++ b/src/query/boolean_query/boolean_weight.rs
@@ -1,6 +1,7 @@
 use query::Weight;
 use core::SegmentReader;
 use query::Scorer;
+use super::BooleanScorer;
 use query::OccurFilter;
 use Result;
 
@@ -23,11 +24,13 @@ impl BooleanWeight {
 impl Weight for BooleanWeight {
 
     fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
-        // BooleanScorer {
-            
-        // }
-        panic!("");
-        
+        let sub_scorers: Vec<Box<Scorer + 'a>> = try!(
+            self.weights
+                .iter()
+                .map(|weight| weight.scorer(reader))
+                .collect()
+        );
+        let boolean_scorer = BooleanScorer::new(sub_scorers, self.occur_filter); 
+        Ok(box boolean_scorer)
     }
-    
-}
-\ No newline at end of file
+}
diff --git a/src/query/empty_scorer.rs b/src/query/empty_scorer.rs
deleted file mode 100644
index 0c1e989..0000000
--- a/src/query/empty_scorer.rs
+++ /dev/null
@@ -1,22 +0,0 @@
-use query::Scorer;
-use DocSet;
-use Score;
-use DocId;
-
-pub struct EmptyScorer;
-
-impl Scorer for EmptyScorer {
-    fn score(&self) -> Score {
-        0f32
-    }
-}
-
-impl DocSet for EmptyScorer {
-    fn advance(&mut self) -> bool {
-        false
-    }
-
-    fn doc(&self) -> DocId {
-        0
-    }
-}
diff --git a/src/query/mod.rs b/src/query/mod.rs
index 788aa67..a74d71f 100644
--- a/src/query/mod.rs
+++ b/src/query/mod.rs
@@ -16,13 +16,8 @@ mod similarity;
 mod weight;
 mod occur_filter;
 mod term_query;
-mod empty_scorer;
-
-
-pub use self::empty_scorer::EmptyScorer;
 
 pub use self::occur_filter::OccurFilter;
-
 pub use self::similarity::Similarity;
 pub use self::boolean_query::BooleanQuery;
 pub use self::occur::Occur;
@@ -35,4 +30,4 @@ pub use self::query_parser::QueryParser;
 pub use self::explanation::Explanation;
 pub use self::multi_term_accumulator::MultiTermAccumulator;
 pub use self::query_parser::ParsingError;
-pub use self::weight::Weight;
-\ No newline at end of file
+pub use self::weight::Weight;
diff --git a/src/query/multi_term_query.rs b/src/query/multi_term_query.rs
index 5114899..4213567 100644
--- a/src/query/multi_term_query.rs
+++ b/src/query/multi_term_query.rs
@@ -11,7 +11,6 @@ use query::occur_filter::OccurFilter;
 use query::term_query::{TermQuery, TermWeight, TermScorer};
 use query::boolean_query::BooleanScorer;
 
-
 struct MultiTermWeight {
     weights: Vec<TermWeight>,
     occur_filter: OccurFilter,
@@ -21,12 +20,10 @@ struct MultiTermWeight {
 impl Weight for MultiTermWeight {
     
     fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
-        let mut term_scorers: Vec<TermScorer<'a>> = Vec::new();
+        let mut term_scorers: Vec<TermScorer<_>> = Vec::new();
         for term_weight in &self.weights {
-            let term_scorer_option = try!(term_weight.specialized_scorer(reader));
-            if let Some(term_scorer) = term_scorer_option {
-                term_scorers.push(term_scorer);
-            }
+            let term_scorer = try!(term_weight.specialized_scorer(reader));
+            term_scorers.push(term_scorer);
         }
         Ok(box BooleanScorer::new(term_scorers, self.occur_filter.clone())) 
     }
diff --git a/src/query/occur.rs b/src/query/occur.rs
index 86bade9..1f42b4c 100644
--- a/src/query/occur.rs
+++ b/src/query/occur.rs
@@ -2,9 +2,9 @@
 /// should be present or must not be present.
 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
 pub enum Occur {
-    /// The term should be present in the document.
-    /// Document without the term will be considered
-    /// in scoring as well.
+    /// For a given document to be considered for scoring, 
+    /// at least one of the document with the Should or the Must
+    /// Occur constraint must be within the document.
     Should,
     /// Document without the term are excluded from the search.
     Must,
diff --git a/src/query/occur_filter.rs b/src/query/occur_filter.rs
index 188fc63..53280fa 100644
--- a/src/query/occur_filter.rs
+++ b/src/query/occur_filter.rs
@@ -1,16 +1,25 @@
 use query::Occur;
 
-#[derive(Clone)]
+
+/// An OccurFilter represents a filter over a bitset of
+// at most 64 elements.
+///
+/// It wraps some simple bitmask to compute the filter
+/// rapidly. 
+#[derive(Clone, Copy)]
 pub struct OccurFilter {
     and_mask: u64,
     result: u64,    
 }
 
 impl OccurFilter {
+
+    /// Returns true if the bitset is matching the occur list.
     pub fn accept(&self, ord_set: u64) -> bool {
         (self.and_mask & ord_set) == self.result
     }
     
+    /// Builds an `OccurFilter` from a list of `Occur`. 
     pub fn new(occurs: &[Occur]) -> OccurFilter {
         let mut and_mask = 0u64;
         let mut result = 0u64;
diff --git a/src/query/scorer.rs b/src/query/scorer.rs
index 32ed4b4..7513705 100644
--- a/src/query/scorer.rs
+++ b/src/query/scorer.rs
@@ -1,7 +1,10 @@
 use DocSet;
 use collector::Collector;
+use std::ops::{Deref, DerefMut};
 
-/// Scored `DocSet`
+/// Scored set of documents matching a query within a specific segment.
+/// 
+/// See [Query](./trait.Query.html).
 pub trait Scorer: DocSet {
     
     /// Returns the score.
@@ -9,6 +12,8 @@ pub trait Scorer: DocSet {
     /// This method will perform a bit of computation and is not cached.
     fn score(&self,) -> f32;
     
+    /// Consumes the complete `DocSet` and
+    /// push the scored documents to the collector. 
     fn collect(&mut self, collector: &mut Collector) {
         while self.advance() {
             collector.collect(self.doc(), self.score());
@@ -16,3 +21,16 @@ pub trait Scorer: DocSet {
     }
 } 
 
+
+impl<'a> Scorer for Box<Scorer + 'a> {
+    fn score(&self,) -> f32 {
+        self.deref().score()
+    }
+    
+    fn collect(&mut self, collector: &mut Collector) {
+        let scorer = self.deref_mut();
+        while scorer.advance() {
+            collector.collect(scorer.doc(), scorer.score());
+        }
+    }
+}
+\ No newline at end of file
diff --git a/src/query/term_query/term_query.rs b/src/query/term_query/term_query.rs
index 6049d84..11536ac 100644
--- a/src/query/term_query/term_query.rs
+++ b/src/query/term_query/term_query.rs
@@ -6,16 +6,31 @@ use query::Weight;
 use Searcher;
 use std::any::Any;
 
+/// A Term query matches all of the documents
+/// containing a specific term.
+///
+/// The score associated is defined as
+/// `idf` *  sqrt(`term_freq` / `field norm`)
+/// in which :
+/// * idf        - inverse document frequency. 
+/// * term_freq  - number of occurrences of the term in the field
+/// * field norm - number of tokens in the field.
 #[derive(Debug)]
 pub struct TermQuery {
     term: Term,
 }
 
 impl TermQuery {
+
+    /// Returns a weight object.
+    /// 
+    /// While `.weight(...)` returns a boxed trait object,
+    /// this method return a specific implementation.
+    /// This is useful for optimization purpose.
     pub fn specialized_weight(&self, searcher: &Searcher) -> TermWeight {
-        let doc_freq = searcher.doc_freq(&self.term);
         TermWeight {
-            doc_freq: doc_freq,
+            num_docs: searcher.num_docs(),
+            doc_freq: searcher.doc_freq(&self.term),
             term: self.term.clone()
         }
     }
diff --git a/src/query/term_query/term_scorer.rs b/src/query/term_query/term_scorer.rs
index e7c3bf6..db1f484 100644
--- a/src/query/term_query/term_scorer.rs
+++ b/src/query/term_query/term_scorer.rs
@@ -1,18 +1,17 @@
 use Score;
 use DocId;
-use postings::SegmentPostings;
 use fastfield::U32FastFieldReader;
 use postings::DocSet;
 use query::Scorer;
 use postings::Postings;
 
-pub struct TermScorer<'a> {
+pub struct TermScorer<TPostings> where TPostings: Postings {
     pub idf: Score,
     pub fieldnorm_reader: U32FastFieldReader,
-    pub segment_postings: SegmentPostings<'a>,
+    pub segment_postings: TPostings,
 }
 
-impl<'a> DocSet for TermScorer<'a> {
+impl<TPostings> DocSet for TermScorer<TPostings> where TPostings: Postings {
 
     fn advance(&mut self,) -> bool {
         self.segment_postings.advance()
@@ -23,7 +22,7 @@ impl<'a> DocSet for TermScorer<'a> {
     }
 }
 
-impl<'a> Scorer for TermScorer<'a> {
+impl<TPostings> Scorer for TermScorer<TPostings> where TPostings: Postings {
     fn score(&self,) -> Score {
         let doc = self.segment_postings.doc();
         let field_norm = self.fieldnorm_reader.get(doc);
diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs
index acd5ffe..4f99904 100644
--- a/src/query/term_query/term_weight.rs
+++ b/src/query/term_query/term_weight.rs
@@ -2,12 +2,14 @@ use Term;
 use query::Weight;
 use core::SegmentReader;
 use query::Scorer;
-use query::EmptyScorer;
 use postings::SegmentPostingsOption;
+use postings::SegmentPostings;
+use fastfield::U32FastFieldReader;
 use super::term_scorer::TermScorer;
 use Result;
 
 pub struct TermWeight {
+    pub num_docs: u32,
     pub doc_freq: u32,
     pub term: Term,     
 }
@@ -16,33 +18,37 @@ pub struct TermWeight {
 impl Weight for TermWeight {
     
     fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>> {
-        let specialized_scorer_option = try!(self.specialized_scorer(reader));
-        match specialized_scorer_option {
-            Some(term_scorer) => {
-                Ok(box term_scorer)
-            } 
-            None => {
-                Ok(box EmptyScorer)
-            }
-        }
+        let specialized_scorer = try!(self.specialized_scorer(reader));
+        Ok(box specialized_scorer)
     }
     
 }
 
 impl TermWeight {
     
-    pub fn specialized_scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Option<TermScorer<'a>>> {
+    fn idf(&self) -> f32 {
+        1.0 + (self.num_docs as f32 / (self.doc_freq as f32 + 1.0)).ln()
+    }
+
+    pub fn specialized_scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<TermScorer<SegmentPostings<'a>>> {
         let field = self.term.field();
         let fieldnorm_reader = try!(reader.get_fieldnorms_reader(field));
         Ok(
-            reader.read_postings(&self.term, SegmentPostingsOption::Freq)
-              .map(|segment_postings|
-                TermScorer {
-                    idf: 1f32 / (self.doc_freq as f32),
-                    fieldnorm_reader: fieldnorm_reader,
-                    segment_postings: segment_postings,
-                }
-              )
+            reader
+                .read_postings(&self.term, SegmentPostingsOption::Freq)
+                .map(|segment_postings|
+                    TermScorer {
+                        idf: self.idf(),
+                        fieldnorm_reader: fieldnorm_reader,
+                        segment_postings: segment_postings,
+                    }
+                )
+                .unwrap_or(
+                    TermScorer {
+                        idf: 1f32,
+                        fieldnorm_reader: U32FastFieldReader::empty(),
+                        segment_postings: SegmentPostings::empty()
+                    })
         )
     }
     
diff --git a/src/query/weight.rs b/src/query/weight.rs
index 27a7afd..db583a3 100644
--- a/src/query/weight.rs
+++ b/src/query/weight.rs
@@ -2,10 +2,15 @@ use super::Scorer;
 use Result;
 use core::SegmentReader;
 
+
+/// A Weight is the specialization of a Query
+/// for a given set of segments.
+///
+/// See [Query](./trait.Query.html).
 pub trait Weight {
     
-    
+    /// Returns the scorer for the given segment.
+    /// See [Query](./trait.Query.html).
     fn scorer<'a>(&'a self, reader: &'a SegmentReader) -> Result<Box<Scorer + 'a>>;
     
-    
 }
author	Paul Masurel <paul.masurel@gmail.com>	2016-11-02 12:54:06 +0900
committer	Paul Masurel <paul.masurel@gmail.com>	2016-11-02 17:58:20 +0900
commit	6229a927308499e9af2a5ca96ca896cf327538d1 (patch)
tree	a33c8e3126159cc195a28a00cca95a8bca3cfa2d
parent	c2c65d311d2e05570defe3860e82b28605680133 (diff)