diff options
28 files changed, 315 insertions, 225 deletions
diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs index b259bd3..f625e07 100644 --- a/src/common/bitpacker.rs +++ b/src/common/bitpacker.rs @@ -37,7 +37,6 @@ pub struct BitPacker { mini_buffer: u64, mini_buffer_written: usize, num_bits: usize, - written_size: usize, } impl BitPacker { @@ -46,7 +45,6 @@ impl BitPacker { mini_buffer: 0u64, mini_buffer_written: 0, num_bits: num_bits, - written_size: 0, } } @@ -54,14 +52,14 @@ impl BitPacker { let val_u64 = val as u64; if self.mini_buffer_written + self.num_bits > 64 { self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32); - self.written_size += self.mini_buffer.serialize(output)?; + self.mini_buffer.serialize(output)?; self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32); self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 64; } else { self.mini_buffer |= val_u64 << self.mini_buffer_written; self.mini_buffer_written += self.num_bits; if self.mini_buffer_written == 64 { - self.written_size += self.mini_buffer.serialize(output)?; + self.mini_buffer.serialize(output)?; self.mini_buffer_written = 0; self.mini_buffer = 0u64; } @@ -74,18 +72,16 @@ impl BitPacker { let num_bytes = (self.mini_buffer_written + 7) / 8; let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer) }; output.write_all(&arr[..num_bytes])?; - self.written_size += num_bytes; self.mini_buffer_written = 0; } Ok(()) } - pub fn close<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<usize> { + pub fn close<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> { self.flush(output)?; // Padding the write file to simplify reads. output.write_all(&[0u8; 7])?; - self.written_size += 7; - Ok(self.written_size) + Ok(()) } } @@ -163,9 +159,8 @@ mod test { for &val in &vals { bitpacker.write(val, &mut data).unwrap(); } - let num_bytes = bitpacker.close(&mut data).unwrap(); - assert_eq!(num_bytes, (num_bits * len + 7) / 8 + 7); - assert_eq!(data.len(), num_bytes); + bitpacker.close(&mut data).unwrap(); + assert_eq!(data.len(), (num_bits * len + 7) / 8 + 7); let bitunpacker = BitUnpacker::new(data, num_bits); for (i, val) in vals.iter().enumerate() { assert_eq!(bitunpacker.get(i), *val); diff --git a/src/termdict/streamdict/counting_writer.rs b/src/common/counting_writer.rs index db13e36..db13e36 100644 --- a/src/termdict/streamdict/counting_writer.rs +++ b/src/common/counting_writer.rs diff --git a/src/common/mod.rs b/src/common/mod.rs index e8e9fac..0af9d24 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -1,6 +1,7 @@ mod serialize; mod timer; mod vint; +mod counting_writer; pub mod bitpacker; pub use self::serialize::BinarySerializable; @@ -8,6 +9,7 @@ pub use self::timer::Timing; pub use self::timer::TimerTree; pub use self::timer::OpenTimer; pub use self::vint::VInt; +pub use self::counting_writer::CountingWriter; use std::io; diff --git a/src/common/serialize.rs b/src/common/serialize.rs index 471ac3a..ee86247 100644 --- a/src/common/serialize.rs +++ b/src/common/serialize.rs @@ -6,33 +6,35 @@ use std::io::Read; use std::io; use common::VInt; + + pub trait BinarySerializable: fmt::Debug + Sized { - fn serialize(&self, writer: &mut Write) -> io::Result<usize>; - fn deserialize(reader: &mut Read) -> io::Result<Self>; + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()>; + fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>; } impl BinarySerializable for () { - fn serialize(&self, _: &mut Write) -> io::Result<usize> { - Ok(0) + fn serialize<W: Write>(&self, _: &mut W) -> io::Result<()> { + Ok(()) } - fn deserialize(_: &mut Read) -> io::Result<Self> { + fn deserialize<R: Read>(_: &mut R) -> io::Result<Self> { Ok(()) } } impl<T: BinarySerializable> BinarySerializable for Vec<T> { - fn serialize(&self, writer: &mut Write) -> io::Result<usize> { - let mut total_size = try!(VInt(self.len() as u64).serialize(writer)); + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { + VInt(self.len() as u64).serialize(writer)?; for it in self { - total_size += try!(it.serialize(writer)); + it.serialize(writer)?; } - Ok(total_size) + Ok(()) } - fn deserialize(reader: &mut Read) -> io::Result<Vec<T>> { - let num_items = try!(VInt::deserialize(reader)).val(); + fn deserialize<R: Read>(reader: &mut R) -> io::Result<Vec<T>> { + let num_items = VInt::deserialize(reader)?.val(); let mut items: Vec<T> = Vec::with_capacity(num_items as usize); for _ in 0..num_items { - let item = try!(T::deserialize(reader)); + let item = T::deserialize(reader)?; items.push(item); } Ok(items) @@ -41,69 +43,67 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> { impl<Left: BinarySerializable, Right: BinarySerializable> BinarySerializable for (Left, Right) { - fn serialize(&self, write: &mut Write) -> io::Result<usize> { - Ok(try!(self.0.serialize(write)) + try!(self.1.serialize(write))) + fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> { + self.0.serialize(write)?; + self.1.serialize(write) } - fn deserialize(reader: &mut Read) -> io::Result<Self> { - Ok((try!(Left::deserialize(reader)), try!(Right::deserialize(reader)))) + fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> { + Ok((Left::deserialize(reader)?, Right::deserialize(reader)?)) } } impl BinarySerializable for u32 { - fn serialize(&self, writer: &mut Write) -> io::Result<usize> { - writer.write_u32::<Endianness>(*self).map(|_| 4) + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { + writer.write_u32::<Endianness>(*self) } - fn deserialize(reader: &mut Read) -> io::Result<u32> { + fn deserialize<R: Read>(reader: &mut R) -> io::Result<u32> { reader.read_u32::<Endianness>() } } impl BinarySerializable for u64 { - fn serialize(&self, writer: &mut Write) -> io::Result<usize> { - writer.write_u64::<Endianness>(*self).map(|_| 8) + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { + writer.write_u64::<Endianness>(*self) } - fn deserialize(reader: &mut Read) -> io::Result<u64> { + fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> { reader.read_u64::<Endianness>() } } impl BinarySerializable for i64 { - fn serialize(&self, writer: &mut Write) -> io::Result<usize> { - writer.write_i64::<Endianness>(*self).map(|_| 8) + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { + writer.write_i64::<Endianness>(*self) } - fn deserialize(reader: &mut Read) -> io::Result<i64> { + fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> { reader.read_i64::<Endianness>() } } impl BinarySerializable for u8 { - fn serialize(&self, writer: &mut Write) -> io::Result<usize> { - try!(writer.write_u8(*self)); - Ok(1) + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { + writer.write_u8(*self) } - fn deserialize(reader: &mut Read) -> io::Result<u8> { + fn deserialize<R: Read>(reader: &mut R) -> io::Result<u8> { reader.read_u8() } } impl BinarySerializable for String { - fn serialize(&self, writer: &mut Write) -> io::Result<usize> { + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { let data: &[u8] = self.as_bytes(); - let mut size = try!(VInt(data.len() as u64).serialize(writer)); - size += data.len(); - try!(writer.write_all(data)); - Ok(size) + VInt(data.len() as u64).serialize(writer)?; + writer.write_all(data) } - fn deserialize(reader: &mut Read) -> io::Result<String> { - let string_length = try!(VInt::deserialize(reader)).val() as usize; + fn deserialize<R: Read>(reader: &mut R) -> io::Result<String> { + let string_length = VInt::deserialize(reader)?.val() as usize; let mut result = String::with_capacity(string_length); - try!(reader - .take(string_length as u64) - .read_to_string(&mut result)); + reader + .take(string_length as u64) + .read_to_string(&mut result)?; Ok(result) } } @@ -117,9 +117,8 @@ mod test { fn serialize_test<T: BinarySerializable + Eq>(v: T, num_bytes: usize) { let mut buffer: Vec<u8> = Vec::new(); - if num_bytes != 0 { - assert_eq!(v.serialize(&mut buffer).unwrap(), num_bytes); + v.serialize(&mut buffer).unwrap(); assert_eq!(buffer.len(), num_bytes); } else { v.serialize(&mut buffer).unwrap(); diff --git a/src/common/vint.rs b/src/common/vint.rs index 0563d8f..39653e8 100644 --- a/src/common/vint.rs +++ b/src/common/vint.rs @@ -16,27 +16,25 @@ impl VInt { } impl BinarySerializable for VInt { - fn serialize(&self, writer: &mut Write) -> io::Result<usize> { + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { let mut remaining = self.0; - let mut written: usize = 0; let mut buffer = [0u8; 10]; + let mut i = 0; loop { let next_byte: u8 = (remaining % 128u64) as u8; remaining /= 128u64; if remaining == 0u64 { - buffer[written] = next_byte | 128u8; - written += 1; - break; + buffer[i] = next_byte | 128u8; + return writer.write_all(&buffer[0..i + 1]); } else { - buffer[written] = next_byte; - written += 1; + buffer[i] = next_byte; } + i += 1; } - try!(writer.write_all(&buffer[0..written])); - Ok(written) + } - fn deserialize(reader: &mut Read) -> io::Result<Self> { + fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> { let mut bytes = reader.bytes(); let mut result = 0u64; let mut shift = 0u64; diff --git a/src/datastruct/skip/skiplist_builder.rs b/src/datastruct/skip/skiplist_builder.rs index 34c5d8a..eaa439d 100644 --- a/src/datastruct/skip/skiplist_builder.rs +++ b/src/datastruct/skip/skiplist_builder.rs @@ -18,7 +18,7 @@ impl<T: BinarySerializable> LayerBuilder<T> { } fn write(&self, output: &mut Write) -> Result<(), io::Error> { - try!(output.write_all(&self.buffer)); + output.write_all(&self.buffer)?; Ok(()) } @@ -36,8 +36,8 @@ impl<T: BinarySerializable> LayerBuilder<T> { self.remaining -= 1; self.len += 1; let offset = self.written_size() as u32; - try!(doc_id.serialize(&mut self.buffer)); - try!(value.serialize(&mut self.buffer)); + doc_id.serialize(&mut self.buffer)?; + value.serialize(&mut self.buffer)?; Ok(if self.remaining == 0 { self.remaining = self.period; Some((doc_id, offset)) @@ -89,7 +89,7 @@ impl<T: BinarySerializable> SkipListBuilder<T> { } } - pub fn write<W: Write>(self, output: &mut Write) -> io::Result<()> { + pub fn write<W: Write>(self, output: &mut W) -> io::Result<()> { let mut size: u32 = 0; let mut layer_sizes: Vec<u32> = Vec::new(); size += self.data_layer.buffer.len() as u32; @@ -98,10 +98,10 @@ impl<T: BinarySerializable> SkipListBuilder<T> { size += layer.buffer.len() as u32; layer_sizes.push(size); } - try!(layer_sizes.serialize(output)); - try!(self.data_layer.write(output)); + layer_sizes.serialize(output)?; + self.data_layer.write(output)?; for layer in self.skip_layers.iter().rev() { - try!(layer.write(output)); + layer.write(output)?; } Ok(()) } diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index ee1fc1c..7c01d0a 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -12,6 +12,7 @@ use fastfield::FastFieldsWriter; use common::bitpacker::compute_num_bits; use common::bitpacker::BitUnpacker; use schema::FieldType; +use error::ResultExt; use common; use owning_ref::OwningRef; @@ -125,9 +126,20 @@ impl From<Vec<u64>> for U64FastFieldReader { fast_field_writers.serialize(&mut serializer).unwrap(); serializer.close().unwrap(); } - let source = directory.open_read(path).unwrap(); - let fast_field_readers = FastFieldsReader::from_source(source).unwrap(); - fast_field_readers.open_reader(field).unwrap() + directory + .open_read(path) + .chain_err(|| "Failed to open the file") + .and_then(|source| { + FastFieldsReader::from_source(source) + .chain_err(|| "Failed to read the file.") + }) + .and_then(|ff_readers| { + ff_readers + .open_reader(field) + .ok_or_else(|| "Failed to find the requested field".into()) + }) + .expect("This should never happen, please report.") + } } diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs index 7f97b3b..ef6ffed 100644 --- a/src/fastfield/serializer.rs +++ b/src/fastfield/serializer.rs @@ -2,9 +2,9 @@ use common::BinarySerializable; use directory::WritePtr; use schema::Field; use common::bitpacker::{compute_num_bits, BitPacker}; +use common::CountingWriter; use std::io::{self, Write, Seek, SeekFrom}; - /// `FastFieldSerializer` is in charge of serializing /// fastfields on disk. /// @@ -26,8 +26,7 @@ use std::io::{self, Write, Seek, SeekFrom}; /// * `close_field()` /// * `close()` pub struct FastFieldSerializer { - write: WritePtr, - written_size: usize, + write: CountingWriter<WritePtr>, fields: Vec<(Field, u32)>, min_value: u64, field_open: bool, @@ -37,12 +36,12 @@ pub struct FastFieldSerializer { impl FastFieldSerializer { /// Constructor - pub fn new(mut write: WritePtr) -> io::Result<FastFieldSerializer> { + pub fn new(write: WritePtr) -> io::Result<FastFieldSerializer> { // just making room for the pointer to header. - let written_size: usize = try!(0u32.serialize(&mut write)); + let mut counting_writer = CountingWriter::wrap(write); + 0u32.serialize(&mut counting_writer)?; Ok(FastFieldSerializer { - write: write, - written_size: written_size, + write: counting_writer, fields: Vec::new(), min_value: 0, field_open: false, @@ -61,11 +60,11 @@ impl FastFieldSerializer { } self.min_value = min_value; self.field_open = true; - self.fields.push((field, self.written_size as u32)); - let write: &mut Write = &mut self.write; - self.written_size += try!(min_value.serialize(write)); + self.fields.push((field, self.write.written_bytes() as u32)); + let write = &mut self.write; + min_value.serialize(write)?; let amplitude = max_value - min_value; - self.written_size += try!(amplitude.serialize(write)); + amplitude.serialize(write)?; let num_bits = compute_num_bits(amplitude); self.bit_packer = BitPacker::new(num_bits as usize); Ok(()) @@ -88,7 +87,7 @@ impl FastFieldSerializer { // adding some padding to make sure we // can read the last elements with our u64 // cursor - self.written_size += self.bit_packer.close(&mut self.write)?; + self.bit_packer.close(&mut self.write)?; Ok(()) } @@ -96,15 +95,16 @@ impl FastFieldSerializer { /// Closes the serializer /// /// After this call the data must be persistently save on disk. - pub fn close(mut self) -> io::Result<usize> { + pub fn close(self) -> io::Result<usize> { if self.field_open { return Err(io::Error::new(io::ErrorKind::Other, "Last field not closed")); } - let header_offset: usize = self.written_size; - self.written_size += try!(self.fields.serialize(&mut self.write)); - try!(self.write.seek(SeekFrom::Start(0))); - try!((header_offset as u32).serialize(&mut self.write)); - try!(self.write.flush()); - Ok(self.written_size) + let header_offset: usize = self.write.written_bytes() as usize; + let (mut write, written_size) = self.write.finish()?; + self.fields.serialize(&mut write)?; + write.seek(SeekFrom::Start(0))?; + (header_offset as u32).serialize(&mut write)?; + write.flush()?; + Ok(written_size) } } diff --git a/src/postings/docset.rs b/src/postings/docset.rs index e28319f..ea4211a 100644 --- a/src/postings/docset.rs +++ b/src/postings/docset.rs @@ -65,6 +65,10 @@ pub trait DocSet { None } } + + /// Returns a best-effort hint of the + /// length of the docset. + fn size_hint(&self) -> usize; } @@ -83,6 +87,11 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> { let unboxed: &TDocSet = self.borrow(); unboxed.doc() } + + fn size_hint(&self) -> usize { + let unboxed: &TDocSet = self.borrow(); + unboxed.size_hint() + } } impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet { @@ -100,4 +109,9 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet { let unref: &TDocSet = *self; unref.doc() } + + fn size_hint(&self) -> usize { + let unref: &TDocSet = *self; + unref.size_hint() + } } diff --git a/src/postings/intersection.rs b/src/postings/intersection.rs index e4e4c23..06bc0b9 100644 --- a/src/postings/intersection.rs +++ b/src/postings/intersection.rs @@ -10,12 +10,13 @@ pub struct IntersectionDocSet<TDocSet: DocSet> { } impl<TDocSet: DocSet> From<Vec<TDocSet>> for IntersectionDocSet<TDocSet> { - fn from(docsets: Vec<TDocSet>) -> IntersectionDocSet<TDocSet> { + fn from(mut docsets: Vec<TDocSet>) -> IntersectionDocSet<TDocSet> { assert!(docsets.len() >= 2); + docsets.sort_by_key(|docset| docset.size_hint()); IntersectionDocSet { docsets: docsets, finished: false, - doc: DocId::max_value(), + doc: 0u32, } } } @@ -31,37 +32,51 @@ impl<TDocSet: DocSet> IntersectionDocSet<TDocSet> { impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> { + fn size_hint(&self) -> usize { + self.docsets + .iter() + .map(|docset| docset.size_hint()) + .min() + .unwrap() // safe as docsets cannot be empty. + } + + #[allow(never_loop)] fn advance(&mut self) -> bool { if self.finished { return false; } - let num_docsets = self.docsets.len(); - let mut count_matching = 0; - let mut doc_candidate = 0; - let mut ord = 0; - loop { - let mut doc_set = &mut self.docsets[ord]; - match doc_set.skip_next(doc_candidate) { - SkipResult::Reached => { - count_matching += 1; - if count_matching == num_docsets { - self.doc = doc_candidate; - return true; + + let mut candidate_doc = self.doc; + let mut candidate_ord = self.docsets.len(); + + 'outer: loop { + + for (ord, docset) in self.docsets.iter_mut().enumerate() { + if ord != candidate_ord { + // `candidate_ord` is already at the + // right position. + // + // Calling `skip_next` would advance this docset + // and miss it. + match docset.skip_next(candidate_doc) { + SkipResult::Reached => {} + SkipResult::OverStep => { + // this is not in the intersection, + // let's update our candidate. + candidate_doc = docset.doc(); + candidate_ord = ord; + continue 'outer; + } + SkipResult::End => { + self.finished = true; + return false; + } } } - SkipResult::End => { - self.finished = true; - return false; - } - SkipResult::OverStep => { - count_matching = 1; - doc_candidate = doc_set.doc(); - } - } - ord += 1; - if ord == num_docsets { - ord = 0; } + + self.doc = candidate_doc; + return true; } } @@ -69,3 +84,51 @@ impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> { self.doc } } + + +#[cfg(test)] +mod tests { + + use postings::{DocSet, VecPostings, IntersectionDocSet}; + + #[test] + fn test_intersection() { + { + let left = VecPostings::from(vec![1, 3, 9]); + let right = VecPostings::from(vec![3, 4, 9, 18]); + let mut intersection = IntersectionDocSet::from(vec![left, right]); + assert!(intersection.advance()); + assert_eq!(intersection.doc(), 3); + assert!(intersection.advance()); + assert_eq!(intersection.doc(), 9); + assert!(!intersection.advance()); + } + { + let a = VecPostings::from(vec![1, 3, 9]); + let b = VecPostings::from(vec![3, 4, 9, 18]); + let c = VecPostings::from(vec![1, 5, 9, 111]); + let mut intersection = IntersectionDocSet::from(vec![a, b, c]); + assert!(intersection.advance()); + assert_eq!(intersection.doc(), 9); + assert!(!intersection.advance()); + } + } + + #[test] + fn test_intersection_zero() { + let left = VecPostings::from(vec![0]); + let right = VecPostings::from(vec![0]); + let mut intersection = IntersectionDocSet::from(vec![left, right]); + assert!(intersection.advance()); + assert_eq!(intersection.doc(), 0); + } + + #[test] + fn test_intersection_empty() { + let a = VecPostings::from(vec![1, 3]); + let b = VecPostings::from(vec![1, 4]); + let c = VecPostings::from(vec![3, 9]); + let mut intersection = IntersectionDocSet::from(vec![a, b, c]); + assert!(!intersection.advance()); + } +} diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 647a7ae..5de1f7a 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -373,29 +373,6 @@ mod tests { } } - #[test] - fn test_intersection() { - { - let left = VecPostings::from(vec![1, 3, 9]); - let right = VecPostings::from(vec![3, 4, 9, 18]); - let mut intersection = IntersectionDocSet::from(vec![left, right]); - assert!(intersection.advance()); - assert_eq!(intersection.doc(), 3); - assert!(intersection.advance()); - assert_eq!(intersection.doc(), 9); - assert!(!intersection.advance()); - } - { - let a = VecPostings::from(vec![1, 3, 9]); - let b = VecPostings::from(vec![3, 4, 9, 18]); - let c = VecPostings::from(vec![1, 5, 9, 111]); - let mut intersection = IntersectionDocSet::from(vec![a, b, c]); - assert!(intersection.advance()); - assert_eq!(intersection.doc(), 9); - assert!(!intersection.advance()); - } - } - lazy_static! { static ref TERM_A: Term = { @@ -406,6 +383,14 @@ mod tests { let field = Field(0); Term::from_field_text(field, "b") }; + static ref TERM_C: Term = { + let field = Field(0); + Term::from_field_text(field, "c") + }; + static ref TERM_D: Term = { + let field = Field(0); + Term::from_field_text(field, "d") + }; static ref INDEX: Index = { let mut schema_builder = SchemaBuilder::default(); let text_field = schema_builder.add_text_field("text", STRING); @@ -415,25 +400,23 @@ mod tests { let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed); let index = Index::create_in_ram(schema); - let mut count_a = 0; - let mut count_b = 0; - let posting_list_size = 100_000; + let posting_list_size = 1_000_000; { let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap(); - for _ in 0 .. { - if count_a >= posting_list_size && - count_b >= posting_list_size { - break; - } + for _ in 0 .. posting_list_size { let mut doc = Document::default(); - if count_a < posting_list_size && rng.gen_weighted_bool(15) { - count_a += 1; + if rng.gen_weighted_bool(15) { doc.add_text(text_field, "a"); } - if count_b < posting_list_size && rng.gen_weighted_bool(10) { - count_b += 1; + if rng.gen_weighted_bool(10) { doc.add_text(text_field, "b"); } + if rng.gen_weighted_bool(5) { + doc.add_text(text_field, "c"); + } + if rng.gen_weighted_bool(1) { + doc.add_text(text_field, "d"); + } index_writer.add_document(doc); } assert!(index_writer.commit().is_ok()); @@ -467,8 +450,16 @@ mod tests { let segment_postings_b = segment_reader .read_postings(&*TERM_B, SegmentPostingsOption::NoFreq) .unwrap(); + let segment_postings_c = segment_reader + .read_postings(&*TERM_C, SegmentPostingsOption::NoFreq) + .unwrap(); + let segment_postings_d = segment_reader + .read_postings(&*TERM_D, SegmentPostingsOption::NoFreq) + .unwrap(); let mut intersection = IntersectionDocSet::from(vec![segment_postings_a, - segment_postings_b]); + segment_postings_b, + segment_postings_c, + segment_postings_d]); while intersection.advance() {} }); } diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs index d6386d1..f429226 100644 --- a/src/postings/segment_postings.rs +++ b/src/postings/segment_postings.rs @@ -152,6 +152,10 @@ impl<'a> DocSet for SegmentPostings<'a> { } } + fn size_hint(&self) -> usize { + self.len() + } + #[inline] fn doc(&self) -> DocId { let docs = self.block_cursor.docs(); diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index d5e72fa..4a3078a 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -10,12 +10,11 @@ use directory::WritePtr; use compression::{NUM_DOCS_PER_BLOCK, BlockEncoder, CompositeEncoder}; use DocId; use core::Segment; -use std::io; -use core::SegmentComponent; -use std::io::Write; +use std::io::{self, Write}; use compression::VIntEncoder; use common::VInt; use common::BinarySerializable; +use common::CountingWriter; use termdict::TermDictiona |