diff options
Diffstat (limited to 'src/postings/segment_postings.rs')
-rw-r--r-- | src/postings/segment_postings.rs | 81 |
1 files changed, 58 insertions, 23 deletions
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index b12c0b7..b189d02 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -4,7 +4,7 @@ use common::{BinarySerializable, VInt}; use docset::{DocSet, SkipResult}; use owned_read::OwnedRead; use positions::PositionReader; -use postings::compression::compressed_block_size; +use postings::compression::{compressed_block_size, AlignedBuffer}; use postings::compression::{BlockDecoder, VIntDecoder, COMPRESSION_BLOCK_SIZE}; use postings::serializer::PostingsSerializer; use postings::BlockSearcher; @@ -130,9 +130,11 @@ impl DocSet for SegmentPostings { // next needs to be called a first time to point to the correct element. #[inline] fn advance(&mut self) -> bool { - if self.position_computer.is_some() { + if self.position_computer.is_some() && self.cur < COMPRESSION_BLOCK_SIZE { let term_freq = self.term_freq() as usize; - self.position_computer.as_mut().unwrap().add_skip(term_freq); + if let Some(position_computer) = self.position_computer.as_mut() { + position_computer.add_skip(term_freq); + } } self.cur += 1; if self.cur >= self.block_cursor.block_len() { @@ -167,7 +169,6 @@ impl DocSet for SegmentPostings { // skip blocks until one that might contain the target // check if we need to go to the next block - let need_positions = self.position_computer.is_some(); let mut sum_freqs_skipped: u32 = 0; if !self .block_cursor @@ -181,7 +182,7 @@ impl DocSet for SegmentPostings { // we are not in the right block. // // First compute all of the freqs skipped from the current block. - if need_positions { + if self.position_computer.is_some() { sum_freqs_skipped = self.block_cursor.freqs()[self.cur..].iter().sum(); match self.block_cursor.skip_to(target) { BlockSegmentPostingsSkipResult::Success(block_skip_freqs) => { @@ -200,24 +201,21 @@ impl DocSet for SegmentPostings { self.cur = 0; } + let cur = self.cur; + // we're in the right block now, start with an exponential search - let block_docs = self.block_cursor.docs(); + let (output, len) = self.block_cursor.docs_aligned(); let new_cur = self .block_searcher - .search_in_block(&block_docs, self.cur, target); - if need_positions { - sum_freqs_skipped += self.block_cursor.freqs()[self.cur..new_cur] - .iter() - .sum::<u32>(); - self.position_computer - .as_mut() - .unwrap() - .add_skip(sum_freqs_skipped as usize); + .search_in_block(&output, len, cur, target); + if let Some(position_computer) = self.position_computer.as_mut() { + sum_freqs_skipped += self.block_cursor.freqs()[cur..new_cur].iter().sum::<u32>(); + position_computer.add_skip(sum_freqs_skipped as usize); } self.cur = new_cur; // `doc` is now the first element >= `target` - let doc = block_docs[new_cur]; + let doc = output.0[new_cur]; debug_assert!(doc >= target); if doc == target { SkipResult::Reached @@ -227,12 +225,16 @@ impl DocSet for SegmentPostings { } /// Return the current document's `DocId`. + /// + /// # Panics + /// + /// Will panics if called without having called advance before. #[inline] fn doc(&self) -> DocId { let docs = self.block_cursor.docs(); debug_assert!( self.cur < docs.len(), - "Have you forgotten to call `.advance()` at least once before calling .doc()." + "Have you forgotten to call `.advance()` at least once before calling `.doc()` ." ); docs[self.cur] } @@ -264,17 +266,33 @@ impl HasLen for SegmentPostings { } impl Postings for SegmentPostings { + /// Returns the frequency associated to the current document. + /// If the schema is set up so that no frequency have been encoded, + /// this method should always return 1. + /// + /// # Panics + /// + /// Will panics if called without having called advance before. fn term_freq(&self) -> u32 { + debug_assert!( + // Here we do not use the len of `freqs()` + // because it is actually ok to request for the freq of doc + // even if no frequency were encoded for the field. + // + // In that case we hit the block just as if the frequency had been + // decoded. The block is simply prefilled by the value 1. + self.cur < COMPRESSION_BLOCK_SIZE, + "Have you forgotten to call `.advance()` at least once before calling \ + `.term_freq()`." + ); self.block_cursor.freq(self.cur) } fn positions_with_offset(&mut self, offset: u32, output: &mut Vec<u32>) { - if self.position_computer.is_some() { - output.resize(self.term_freq() as usize, 0u32); - self.position_computer - .as_mut() - .unwrap() - .positions_with_offset(offset, &mut output[..]) + let term_freq = self.term_freq() as usize; + if let Some(position_comp) = self.position_computer.as_mut() { + output.resize(term_freq, 0u32); + position_comp.positions_with_offset(offset, &mut output[..]); } else { output.clear(); } @@ -396,6 +414,10 @@ impl BlockSegmentPostings { self.doc_decoder.output_array() } + pub(crate) fn docs_aligned(&self) -> (&AlignedBuffer, usize) { + self.doc_decoder.output_aligned() + } + /// Return the document at index `idx` of the block. #[inline] pub fn doc(&self, idx: usize) -> u32 { @@ -592,6 +614,7 @@ mod tests { use common::HasLen; use core::Index; use docset::DocSet; + use postings::postings::Postings; use schema::IndexRecordOption; use schema::Schema; use schema::Term; @@ -609,6 +632,18 @@ mod tests { } #[test] + #[should_panic(expected = "Have you forgotten to call `.advance()`")] + fn test_panic_if_doc_called_before_advance() { + SegmentPostings::empty().doc(); + } + + #[test] + #[should_panic(expected = "Have you forgotten to call `.advance()`")] + fn test_panic_if_freq_called_before_advance() { + SegmentPostings::empty().term_freq(); + } + + #[test] fn test_empty_block_segment_postings() { let mut postings = BlockSegmentPostings::empty(); assert!(!postings.advance()); |