diff options
author | Paul Masurel <paul.masurel@gmail.com> | 2018-03-23 13:52:10 +0900 |
---|---|---|
committer | Paul Masurel <paul.masurel@gmail.com> | 2018-03-23 13:52:10 +0900 |
commit | 30914597771352d6171fa72f01a4539339c87873 (patch) | |
tree | 4eb81387530b639dc98e147bcf69660f65bd453a | |
parent | b7f8884246b15950c873fa48229ee794a9aa14bd (diff) |
Fixed main bug. Unit test still not passing because of altered scoring
-rw-r--r-- | src/core/inverted_index_reader.rs | 25 | ||||
-rw-r--r-- | src/directory/read_only_source.rs | 1 | ||||
-rw-r--r-- | src/postings/postings_writer.rs | 1 | ||||
-rw-r--r-- | src/postings/serializer.rs | 14 |
4 files changed, 23 insertions, 18 deletions
diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs index 0a95bf5..4a81b62 100644 --- a/src/core/inverted_index_reader.rs +++ b/src/core/inverted_index_reader.rs @@ -30,6 +30,7 @@ pub struct InvertedIndexReader { positions_source: ReadOnlySource, delete_bitset_opt: Option<DeleteBitSet>, record_option: IndexRecordOption, + total_num_tokens: u64 } impl InvertedIndexReader { @@ -40,12 +41,16 @@ impl InvertedIndexReader { delete_bitset_opt: Option<DeleteBitSet>, record_option: IndexRecordOption, ) -> InvertedIndexReader { + let total_num_tokens_data = postings_source.slice(0, 8); + let mut total_num_tokens_cursor = total_num_tokens_data.as_slice(); + let total_num_tokens = u64::deserialize(&mut total_num_tokens_cursor).unwrap_or(0u64); InvertedIndexReader { termdict, - postings_source, + postings_source: postings_source.slice_from(8), positions_source, delete_bitset_opt, record_option, + total_num_tokens } } @@ -55,13 +60,14 @@ impl InvertedIndexReader { let record_option = field_type .get_index_record_option() .unwrap_or(IndexRecordOption::Basic); - InvertedIndexReader::new( - TermDictionaryImpl::empty(field_type), - ReadOnlySource::empty(), - ReadOnlySource::empty(), - None, + InvertedIndexReader { + termdict: TermDictionaryImpl::empty(field_type), + postings_source: ReadOnlySource::empty(), + positions_source: ReadOnlySource::empty(), + delete_bitset_opt: None, record_option, - ) + total_num_tokens: 0u64 + } } /// Returns the term info associated with the term. @@ -149,10 +155,7 @@ impl InvertedIndexReader { /// Returns the total number of tokens recorded for all documents /// (including deleted documents). pub fn total_num_tokens(&self) -> u64 { - let total_num_tokens_data = self.postings_source.slice(0, 8); - let mut total_num_tokens_cursor = total_num_tokens_data.as_slice(); - let result = u64::deserialize(&mut total_num_tokens_cursor).unwrap_or(0u64); - result + self.total_num_tokens } diff --git a/src/directory/read_only_source.rs b/src/directory/read_only_source.rs index e7e5453..ec9e8ae 100644 --- a/src/directory/read_only_source.rs +++ b/src/directory/read_only_source.rs @@ -63,6 +63,7 @@ impl ReadOnlySource { /// 1KB slice is remaining, the whole `500MBs` /// are retained in memory. pub fn slice(&self, from_offset: usize, to_offset: usize) -> ReadOnlySource { + assert!(from_offset <= to_offset, "Requested negative slice [{}..{}]", from_offset, to_offset); match *self { ReadOnlySource::Mmap(ref mmap_read_only) => { let sliced_mmap = mmap_read_only.range(from_offset, to_offset - from_offset); diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index 698c2c7..054eeb1 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -240,6 +240,7 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<' recorder.new_doc(doc, heap); } recorder.record_position(position, heap); + self.add_num_tokens(1u32); term_ord } diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 87d7b22..973038a 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -112,7 +112,7 @@ impl InvertedIndexSerializer { /// the serialization of a specific field. pub struct FieldSerializer<'a> { term_dictionary_builder: TermDictionaryBuilderImpl<&'a mut CountingWriter<WritePtr>>, - postings_serializer: PostingsSerializer<'a, WritePtr>, + postings_serializer: PostingsSerializer<&'a mut CountingWriter<WritePtr>>, positions_serializer_opt: Option<PositionSerializer<&'a mut CountingWriter<WritePtr>>>, current_term_info: TermInfo, term_open: bool, @@ -235,8 +235,8 @@ impl<'a> FieldSerializer<'a> { } } -pub struct PostingsSerializer<'a, W: 'a + Write> { - postings_write: &'a mut CountingWriter<W>, +pub struct PostingsSerializer<W: Write> { + postings_write: CountingWriter<W>, last_doc_id_encoded: u32, block_encoder: BlockEncoder, @@ -246,10 +246,10 @@ pub struct PostingsSerializer<'a, W: 'a + Write> { termfreq_enabled: bool, } -impl<'a, W: 'a + Write> PostingsSerializer<'a, W> { - pub fn new(write: &'a mut CountingWriter<W>, termfreq_enabled: bool) -> PostingsSerializer<W> { +impl<W: Write> PostingsSerializer<W> { + pub fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer<W> { PostingsSerializer { - postings_write: write, + postings_write: CountingWriter::wrap(write), block_encoder: BlockEncoder::new(), doc_ids: vec![], @@ -310,7 +310,7 @@ impl<'a, W: 'a + Write> PostingsSerializer<'a, W> { Ok(()) } - fn close(self) -> io::Result<()> { + fn close(mut self) -> io::Result<()> { self.postings_write.flush() } |