summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2018-03-23 13:52:10 +0900
committerPaul Masurel <paul.masurel@gmail.com>2018-03-23 13:52:10 +0900
commit30914597771352d6171fa72f01a4539339c87873 (patch)
tree4eb81387530b639dc98e147bcf69660f65bd453a
parentb7f8884246b15950c873fa48229ee794a9aa14bd (diff)
Fixed main bug. Unit test still not passing because of altered scoring
-rw-r--r--src/core/inverted_index_reader.rs25
-rw-r--r--src/directory/read_only_source.rs1
-rw-r--r--src/postings/postings_writer.rs1
-rw-r--r--src/postings/serializer.rs14
4 files changed, 23 insertions, 18 deletions
diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs
index 0a95bf5..4a81b62 100644
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -30,6 +30,7 @@ pub struct InvertedIndexReader {
positions_source: ReadOnlySource,
delete_bitset_opt: Option<DeleteBitSet>,
record_option: IndexRecordOption,
+ total_num_tokens: u64
}
impl InvertedIndexReader {
@@ -40,12 +41,16 @@ impl InvertedIndexReader {
delete_bitset_opt: Option<DeleteBitSet>,
record_option: IndexRecordOption,
) -> InvertedIndexReader {
+ let total_num_tokens_data = postings_source.slice(0, 8);
+ let mut total_num_tokens_cursor = total_num_tokens_data.as_slice();
+ let total_num_tokens = u64::deserialize(&mut total_num_tokens_cursor).unwrap_or(0u64);
InvertedIndexReader {
termdict,
- postings_source,
+ postings_source: postings_source.slice_from(8),
positions_source,
delete_bitset_opt,
record_option,
+ total_num_tokens
}
}
@@ -55,13 +60,14 @@ impl InvertedIndexReader {
let record_option = field_type
.get_index_record_option()
.unwrap_or(IndexRecordOption::Basic);
- InvertedIndexReader::new(
- TermDictionaryImpl::empty(field_type),
- ReadOnlySource::empty(),
- ReadOnlySource::empty(),
- None,
+ InvertedIndexReader {
+ termdict: TermDictionaryImpl::empty(field_type),
+ postings_source: ReadOnlySource::empty(),
+ positions_source: ReadOnlySource::empty(),
+ delete_bitset_opt: None,
record_option,
- )
+ total_num_tokens: 0u64
+ }
}
/// Returns the term info associated with the term.
@@ -149,10 +155,7 @@ impl InvertedIndexReader {
/// Returns the total number of tokens recorded for all documents
/// (including deleted documents).
pub fn total_num_tokens(&self) -> u64 {
- let total_num_tokens_data = self.postings_source.slice(0, 8);
- let mut total_num_tokens_cursor = total_num_tokens_data.as_slice();
- let result = u64::deserialize(&mut total_num_tokens_cursor).unwrap_or(0u64);
- result
+ self.total_num_tokens
}
diff --git a/src/directory/read_only_source.rs b/src/directory/read_only_source.rs
index e7e5453..ec9e8ae 100644
--- a/src/directory/read_only_source.rs
+++ b/src/directory/read_only_source.rs
@@ -63,6 +63,7 @@ impl ReadOnlySource {
/// 1KB slice is remaining, the whole `500MBs`
/// are retained in memory.
pub fn slice(&self, from_offset: usize, to_offset: usize) -> ReadOnlySource {
+ assert!(from_offset <= to_offset, "Requested negative slice [{}..{}]", from_offset, to_offset);
match *self {
ReadOnlySource::Mmap(ref mmap_read_only) => {
let sliced_mmap = mmap_read_only.range(from_offset, to_offset - from_offset);
diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs
index 698c2c7..054eeb1 100644
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -240,6 +240,7 @@ impl<'a, Rec: Recorder + 'static> PostingsWriter for SpecializedPostingsWriter<'
recorder.new_doc(doc, heap);
}
recorder.record_position(position, heap);
+ self.add_num_tokens(1u32);
term_ord
}
diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs
index 87d7b22..973038a 100644
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -112,7 +112,7 @@ impl InvertedIndexSerializer {
/// the serialization of a specific field.
pub struct FieldSerializer<'a> {
term_dictionary_builder: TermDictionaryBuilderImpl<&'a mut CountingWriter<WritePtr>>,
- postings_serializer: PostingsSerializer<'a, WritePtr>,
+ postings_serializer: PostingsSerializer<&'a mut CountingWriter<WritePtr>>,
positions_serializer_opt: Option<PositionSerializer<&'a mut CountingWriter<WritePtr>>>,
current_term_info: TermInfo,
term_open: bool,
@@ -235,8 +235,8 @@ impl<'a> FieldSerializer<'a> {
}
}
-pub struct PostingsSerializer<'a, W: 'a + Write> {
- postings_write: &'a mut CountingWriter<W>,
+pub struct PostingsSerializer<W: Write> {
+ postings_write: CountingWriter<W>,
last_doc_id_encoded: u32,
block_encoder: BlockEncoder,
@@ -246,10 +246,10 @@ pub struct PostingsSerializer<'a, W: 'a + Write> {
termfreq_enabled: bool,
}
-impl<'a, W: 'a + Write> PostingsSerializer<'a, W> {
- pub fn new(write: &'a mut CountingWriter<W>, termfreq_enabled: bool) -> PostingsSerializer<W> {
+impl<W: Write> PostingsSerializer<W> {
+ pub fn new(write: W, termfreq_enabled: bool) -> PostingsSerializer<W> {
PostingsSerializer {
- postings_write: write,
+ postings_write: CountingWriter::wrap(write),
block_encoder: BlockEncoder::new(),
doc_ids: vec![],
@@ -310,7 +310,7 @@ impl<'a, W: 'a + Write> PostingsSerializer<'a, W> {
Ok(())
}
- fn close(self) -> io::Result<()> {
+ fn close(mut self) -> io::Result<()> {
self.postings_write.flush()
}