summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/common/bitpacker.rs17
-rw-r--r--src/common/counting_writer.rs (renamed from src/termdict/streamdict/counting_writer.rs)0
-rw-r--r--src/common/mod.rs2
-rw-r--r--src/common/serialize.rs81
-rw-r--r--src/common/vint.rs18
-rw-r--r--src/datastruct/skip/skiplist_builder.rs14
-rw-r--r--src/fastfield/reader.rs18
-rw-r--r--src/fastfield/serializer.rs38
-rw-r--r--src/postings/docset.rs14
-rw-r--r--src/postings/intersection.rs115
-rw-r--r--src/postings/mod.rs63
-rw-r--r--src/postings/segment_postings.rs4
-rw-r--r--src/postings/serializer.rs51
-rw-r--r--src/postings/term_info.rs10
-rw-r--r--src/postings/vec_postings.rs4
-rw-r--r--src/query/boolean_query/boolean_scorer.rs12
-rw-r--r--src/query/phrase_query/phrase_scorer.rs4
-rw-r--r--src/query/scorer.rs4
-rw-r--r--src/query/term_query/term_scorer.rs5
-rw-r--r--src/schema/field.rs4
-rw-r--r--src/schema/field_value.rs7
-rw-r--r--src/schema/value.rs18
-rw-r--r--src/store/writer.rs28
-rw-r--r--src/termdict/merger.rs2
-rw-r--r--src/termdict/mod.rs2
-rw-r--r--src/termdict/streamdict/mod.rs2
-rw-r--r--src/termdict/streamdict/streamer.rs1
-rw-r--r--src/termdict/streamdict/termdict.rs2
28 files changed, 315 insertions, 225 deletions
diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs
index b259bd3..f625e07 100644
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -37,7 +37,6 @@ pub struct BitPacker {
mini_buffer: u64,
mini_buffer_written: usize,
num_bits: usize,
- written_size: usize,
}
impl BitPacker {
@@ -46,7 +45,6 @@ impl BitPacker {
mini_buffer: 0u64,
mini_buffer_written: 0,
num_bits: num_bits,
- written_size: 0,
}
}
@@ -54,14 +52,14 @@ impl BitPacker {
let val_u64 = val as u64;
if self.mini_buffer_written + self.num_bits > 64 {
self.mini_buffer |= val_u64.wrapping_shl(self.mini_buffer_written as u32);
- self.written_size += self.mini_buffer.serialize(output)?;
+ self.mini_buffer.serialize(output)?;
self.mini_buffer = val_u64.wrapping_shr((64 - self.mini_buffer_written) as u32);
self.mini_buffer_written = self.mini_buffer_written + (self.num_bits as usize) - 64;
} else {
self.mini_buffer |= val_u64 << self.mini_buffer_written;
self.mini_buffer_written += self.num_bits;
if self.mini_buffer_written == 64 {
- self.written_size += self.mini_buffer.serialize(output)?;
+ self.mini_buffer.serialize(output)?;
self.mini_buffer_written = 0;
self.mini_buffer = 0u64;
}
@@ -74,18 +72,16 @@ impl BitPacker {
let num_bytes = (self.mini_buffer_written + 7) / 8;
let arr: [u8; 8] = unsafe { mem::transmute::<u64, [u8; 8]>(self.mini_buffer) };
output.write_all(&arr[..num_bytes])?;
- self.written_size += num_bytes;
self.mini_buffer_written = 0;
}
Ok(())
}
- pub fn close<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<usize> {
+ pub fn close<TWrite: Write>(&mut self, output: &mut TWrite) -> io::Result<()> {
self.flush(output)?;
// Padding the write file to simplify reads.
output.write_all(&[0u8; 7])?;
- self.written_size += 7;
- Ok(self.written_size)
+ Ok(())
}
}
@@ -163,9 +159,8 @@ mod test {
for &val in &vals {
bitpacker.write(val, &mut data).unwrap();
}
- let num_bytes = bitpacker.close(&mut data).unwrap();
- assert_eq!(num_bytes, (num_bits * len + 7) / 8 + 7);
- assert_eq!(data.len(), num_bytes);
+ bitpacker.close(&mut data).unwrap();
+ assert_eq!(data.len(), (num_bits * len + 7) / 8 + 7);
let bitunpacker = BitUnpacker::new(data, num_bits);
for (i, val) in vals.iter().enumerate() {
assert_eq!(bitunpacker.get(i), *val);
diff --git a/src/termdict/streamdict/counting_writer.rs b/src/common/counting_writer.rs
index db13e36..db13e36 100644
--- a/src/termdict/streamdict/counting_writer.rs
+++ b/src/common/counting_writer.rs
diff --git a/src/common/mod.rs b/src/common/mod.rs
index e8e9fac..0af9d24 100644
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -1,6 +1,7 @@
mod serialize;
mod timer;
mod vint;
+mod counting_writer;
pub mod bitpacker;
pub use self::serialize::BinarySerializable;
@@ -8,6 +9,7 @@ pub use self::timer::Timing;
pub use self::timer::TimerTree;
pub use self::timer::OpenTimer;
pub use self::vint::VInt;
+pub use self::counting_writer::CountingWriter;
use std::io;
diff --git a/src/common/serialize.rs b/src/common/serialize.rs
index 471ac3a..ee86247 100644
--- a/src/common/serialize.rs
+++ b/src/common/serialize.rs
@@ -6,33 +6,35 @@ use std::io::Read;
use std::io;
use common::VInt;
+
+
pub trait BinarySerializable: fmt::Debug + Sized {
- fn serialize(&self, writer: &mut Write) -> io::Result<usize>;
- fn deserialize(reader: &mut Read) -> io::Result<Self>;
+ fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()>;
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self>;
}
impl BinarySerializable for () {
- fn serialize(&self, _: &mut Write) -> io::Result<usize> {
- Ok(0)
+ fn serialize<W: Write>(&self, _: &mut W) -> io::Result<()> {
+ Ok(())
}
- fn deserialize(_: &mut Read) -> io::Result<Self> {
+ fn deserialize<R: Read>(_: &mut R) -> io::Result<Self> {
Ok(())
}
}
impl<T: BinarySerializable> BinarySerializable for Vec<T> {
- fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
- let mut total_size = try!(VInt(self.len() as u64).serialize(writer));
+ fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+ VInt(self.len() as u64).serialize(writer)?;
for it in self {
- total_size += try!(it.serialize(writer));
+ it.serialize(writer)?;
}
- Ok(total_size)
+ Ok(())
}
- fn deserialize(reader: &mut Read) -> io::Result<Vec<T>> {
- let num_items = try!(VInt::deserialize(reader)).val();
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<Vec<T>> {
+ let num_items = VInt::deserialize(reader)?.val();
let mut items: Vec<T> = Vec::with_capacity(num_items as usize);
for _ in 0..num_items {
- let item = try!(T::deserialize(reader));
+ let item = T::deserialize(reader)?;
items.push(item);
}
Ok(items)
@@ -41,69 +43,67 @@ impl<T: BinarySerializable> BinarySerializable for Vec<T> {
impl<Left: BinarySerializable, Right: BinarySerializable> BinarySerializable for (Left, Right) {
- fn serialize(&self, write: &mut Write) -> io::Result<usize> {
- Ok(try!(self.0.serialize(write)) + try!(self.1.serialize(write)))
+ fn serialize<W: Write>(&self, write: &mut W) -> io::Result<()> {
+ self.0.serialize(write)?;
+ self.1.serialize(write)
}
- fn deserialize(reader: &mut Read) -> io::Result<Self> {
- Ok((try!(Left::deserialize(reader)), try!(Right::deserialize(reader))))
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+ Ok((Left::deserialize(reader)?, Right::deserialize(reader)?))
}
}
impl BinarySerializable for u32 {
- fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
- writer.write_u32::<Endianness>(*self).map(|_| 4)
+ fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+ writer.write_u32::<Endianness>(*self)
}
- fn deserialize(reader: &mut Read) -> io::Result<u32> {
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<u32> {
reader.read_u32::<Endianness>()
}
}
impl BinarySerializable for u64 {
- fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
- writer.write_u64::<Endianness>(*self).map(|_| 8)
+ fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+ writer.write_u64::<Endianness>(*self)
}
- fn deserialize(reader: &mut Read) -> io::Result<u64> {
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
reader.read_u64::<Endianness>()
}
}
impl BinarySerializable for i64 {
- fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
- writer.write_i64::<Endianness>(*self).map(|_| 8)
+ fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+ writer.write_i64::<Endianness>(*self)
}
- fn deserialize(reader: &mut Read) -> io::Result<i64> {
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
reader.read_i64::<Endianness>()
}
}
impl BinarySerializable for u8 {
- fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
- try!(writer.write_u8(*self));
- Ok(1)
+ fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+ writer.write_u8(*self)
}
- fn deserialize(reader: &mut Read) -> io::Result<u8> {
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<u8> {
reader.read_u8()
}
}
impl BinarySerializable for String {
- fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
+ fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
let data: &[u8] = self.as_bytes();
- let mut size = try!(VInt(data.len() as u64).serialize(writer));
- size += data.len();
- try!(writer.write_all(data));
- Ok(size)
+ VInt(data.len() as u64).serialize(writer)?;
+ writer.write_all(data)
}
- fn deserialize(reader: &mut Read) -> io::Result<String> {
- let string_length = try!(VInt::deserialize(reader)).val() as usize;
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<String> {
+ let string_length = VInt::deserialize(reader)?.val() as usize;
let mut result = String::with_capacity(string_length);
- try!(reader
- .take(string_length as u64)
- .read_to_string(&mut result));
+ reader
+ .take(string_length as u64)
+ .read_to_string(&mut result)?;
Ok(result)
}
}
@@ -117,9 +117,8 @@ mod test {
fn serialize_test<T: BinarySerializable + Eq>(v: T, num_bytes: usize) {
let mut buffer: Vec<u8> = Vec::new();
-
if num_bytes != 0 {
- assert_eq!(v.serialize(&mut buffer).unwrap(), num_bytes);
+ v.serialize(&mut buffer).unwrap();
assert_eq!(buffer.len(), num_bytes);
} else {
v.serialize(&mut buffer).unwrap();
diff --git a/src/common/vint.rs b/src/common/vint.rs
index 0563d8f..39653e8 100644
--- a/src/common/vint.rs
+++ b/src/common/vint.rs
@@ -16,27 +16,25 @@ impl VInt {
}
impl BinarySerializable for VInt {
- fn serialize(&self, writer: &mut Write) -> io::Result<usize> {
+ fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
let mut remaining = self.0;
- let mut written: usize = 0;
let mut buffer = [0u8; 10];
+ let mut i = 0;
loop {
let next_byte: u8 = (remaining % 128u64) as u8;
remaining /= 128u64;
if remaining == 0u64 {
- buffer[written] = next_byte | 128u8;
- written += 1;
- break;
+ buffer[i] = next_byte | 128u8;
+ return writer.write_all(&buffer[0..i + 1]);
} else {
- buffer[written] = next_byte;
- written += 1;
+ buffer[i] = next_byte;
}
+ i += 1;
}
- try!(writer.write_all(&buffer[0..written]));
- Ok(written)
+
}
- fn deserialize(reader: &mut Read) -> io::Result<Self> {
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
let mut bytes = reader.bytes();
let mut result = 0u64;
let mut shift = 0u64;
diff --git a/src/datastruct/skip/skiplist_builder.rs b/src/datastruct/skip/skiplist_builder.rs
index 34c5d8a..eaa439d 100644
--- a/src/datastruct/skip/skiplist_builder.rs
+++ b/src/datastruct/skip/skiplist_builder.rs
@@ -18,7 +18,7 @@ impl<T: BinarySerializable> LayerBuilder<T> {
}
fn write(&self, output: &mut Write) -> Result<(), io::Error> {
- try!(output.write_all(&self.buffer));
+ output.write_all(&self.buffer)?;
Ok(())
}
@@ -36,8 +36,8 @@ impl<T: BinarySerializable> LayerBuilder<T> {
self.remaining -= 1;
self.len += 1;
let offset = self.written_size() as u32;
- try!(doc_id.serialize(&mut self.buffer));
- try!(value.serialize(&mut self.buffer));
+ doc_id.serialize(&mut self.buffer)?;
+ value.serialize(&mut self.buffer)?;
Ok(if self.remaining == 0 {
self.remaining = self.period;
Some((doc_id, offset))
@@ -89,7 +89,7 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
}
}
- pub fn write<W: Write>(self, output: &mut Write) -> io::Result<()> {
+ pub fn write<W: Write>(self, output: &mut W) -> io::Result<()> {
let mut size: u32 = 0;
let mut layer_sizes: Vec<u32> = Vec::new();
size += self.data_layer.buffer.len() as u32;
@@ -98,10 +98,10 @@ impl<T: BinarySerializable> SkipListBuilder<T> {
size += layer.buffer.len() as u32;
layer_sizes.push(size);
}
- try!(layer_sizes.serialize(output));
- try!(self.data_layer.write(output));
+ layer_sizes.serialize(output)?;
+ self.data_layer.write(output)?;
for layer in self.skip_layers.iter().rev() {
- try!(layer.write(output));
+ layer.write(output)?;
}
Ok(())
}
diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs
index ee1fc1c..7c01d0a 100644
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -12,6 +12,7 @@ use fastfield::FastFieldsWriter;
use common::bitpacker::compute_num_bits;
use common::bitpacker::BitUnpacker;
use schema::FieldType;
+use error::ResultExt;
use common;
use owning_ref::OwningRef;
@@ -125,9 +126,20 @@ impl From<Vec<u64>> for U64FastFieldReader {
fast_field_writers.serialize(&mut serializer).unwrap();
serializer.close().unwrap();
}
- let source = directory.open_read(path).unwrap();
- let fast_field_readers = FastFieldsReader::from_source(source).unwrap();
- fast_field_readers.open_reader(field).unwrap()
+ directory
+ .open_read(path)
+ .chain_err(|| "Failed to open the file")
+ .and_then(|source| {
+ FastFieldsReader::from_source(source)
+ .chain_err(|| "Failed to read the file.")
+ })
+ .and_then(|ff_readers| {
+ ff_readers
+ .open_reader(field)
+ .ok_or_else(|| "Failed to find the requested field".into())
+ })
+ .expect("This should never happen, please report.")
+
}
}
diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs
index 7f97b3b..ef6ffed 100644
--- a/src/fastfield/serializer.rs
+++ b/src/fastfield/serializer.rs
@@ -2,9 +2,9 @@ use common::BinarySerializable;
use directory::WritePtr;
use schema::Field;
use common::bitpacker::{compute_num_bits, BitPacker};
+use common::CountingWriter;
use std::io::{self, Write, Seek, SeekFrom};
-
/// `FastFieldSerializer` is in charge of serializing
/// fastfields on disk.
///
@@ -26,8 +26,7 @@ use std::io::{self, Write, Seek, SeekFrom};
/// * `close_field()`
/// * `close()`
pub struct FastFieldSerializer {
- write: WritePtr,
- written_size: usize,
+ write: CountingWriter<WritePtr>,
fields: Vec<(Field, u32)>,
min_value: u64,
field_open: bool,
@@ -37,12 +36,12 @@ pub struct FastFieldSerializer {
impl FastFieldSerializer {
/// Constructor
- pub fn new(mut write: WritePtr) -> io::Result<FastFieldSerializer> {
+ pub fn new(write: WritePtr) -> io::Result<FastFieldSerializer> {
// just making room for the pointer to header.
- let written_size: usize = try!(0u32.serialize(&mut write));
+ let mut counting_writer = CountingWriter::wrap(write);
+ 0u32.serialize(&mut counting_writer)?;
Ok(FastFieldSerializer {
- write: write,
- written_size: written_size,
+ write: counting_writer,
fields: Vec::new(),
min_value: 0,
field_open: false,
@@ -61,11 +60,11 @@ impl FastFieldSerializer {
}
self.min_value = min_value;
self.field_open = true;
- self.fields.push((field, self.written_size as u32));
- let write: &mut Write = &mut self.write;
- self.written_size += try!(min_value.serialize(write));
+ self.fields.push((field, self.write.written_bytes() as u32));
+ let write = &mut self.write;
+ min_value.serialize(write)?;
let amplitude = max_value - min_value;
- self.written_size += try!(amplitude.serialize(write));
+ amplitude.serialize(write)?;
let num_bits = compute_num_bits(amplitude);
self.bit_packer = BitPacker::new(num_bits as usize);
Ok(())
@@ -88,7 +87,7 @@ impl FastFieldSerializer {
// adding some padding to make sure we
// can read the last elements with our u64
// cursor
- self.written_size += self.bit_packer.close(&mut self.write)?;
+ self.bit_packer.close(&mut self.write)?;
Ok(())
}
@@ -96,15 +95,16 @@ impl FastFieldSerializer {
/// Closes the serializer
///
/// After this call the data must be persistently save on disk.
- pub fn close(mut self) -> io::Result<usize> {
+ pub fn close(self) -> io::Result<usize> {
if self.field_open {
return Err(io::Error::new(io::ErrorKind::Other, "Last field not closed"));
}
- let header_offset: usize = self.written_size;
- self.written_size += try!(self.fields.serialize(&mut self.write));
- try!(self.write.seek(SeekFrom::Start(0)));
- try!((header_offset as u32).serialize(&mut self.write));
- try!(self.write.flush());
- Ok(self.written_size)
+ let header_offset: usize = self.write.written_bytes() as usize;
+ let (mut write, written_size) = self.write.finish()?;
+ self.fields.serialize(&mut write)?;
+ write.seek(SeekFrom::Start(0))?;
+ (header_offset as u32).serialize(&mut write)?;
+ write.flush()?;
+ Ok(written_size)
}
}
diff --git a/src/postings/docset.rs b/src/postings/docset.rs
index e28319f..ea4211a 100644
--- a/src/postings/docset.rs
+++ b/src/postings/docset.rs
@@ -65,6 +65,10 @@ pub trait DocSet {
None
}
}
+
+ /// Returns a best-effort hint of the
+ /// length of the docset.
+ fn size_hint(&self) -> usize;
}
@@ -83,6 +87,11 @@ impl<TDocSet: DocSet + ?Sized> DocSet for Box<TDocSet> {
let unboxed: &TDocSet = self.borrow();
unboxed.doc()
}
+
+ fn size_hint(&self) -> usize {
+ let unboxed: &TDocSet = self.borrow();
+ unboxed.size_hint()
+ }
}
impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
@@ -100,4 +109,9 @@ impl<'a, TDocSet: DocSet> DocSet for &'a mut TDocSet {
let unref: &TDocSet = *self;
unref.doc()
}
+
+ fn size_hint(&self) -> usize {
+ let unref: &TDocSet = *self;
+ unref.size_hint()
+ }
}
diff --git a/src/postings/intersection.rs b/src/postings/intersection.rs
index e4e4c23..06bc0b9 100644
--- a/src/postings/intersection.rs
+++ b/src/postings/intersection.rs
@@ -10,12 +10,13 @@ pub struct IntersectionDocSet<TDocSet: DocSet> {
}
impl<TDocSet: DocSet> From<Vec<TDocSet>> for IntersectionDocSet<TDocSet> {
- fn from(docsets: Vec<TDocSet>) -> IntersectionDocSet<TDocSet> {
+ fn from(mut docsets: Vec<TDocSet>) -> IntersectionDocSet<TDocSet> {
assert!(docsets.len() >= 2);
+ docsets.sort_by_key(|docset| docset.size_hint());
IntersectionDocSet {
docsets: docsets,
finished: false,
- doc: DocId::max_value(),
+ doc: 0u32,
}
}
}
@@ -31,37 +32,51 @@ impl<TDocSet: DocSet> IntersectionDocSet<TDocSet> {
impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> {
+ fn size_hint(&self) -> usize {
+ self.docsets
+ .iter()
+ .map(|docset| docset.size_hint())
+ .min()
+ .unwrap() // safe as docsets cannot be empty.
+ }
+
+ #[allow(never_loop)]
fn advance(&mut self) -> bool {
if self.finished {
return false;
}
- let num_docsets = self.docsets.len();
- let mut count_matching = 0;
- let mut doc_candidate = 0;
- let mut ord = 0;
- loop {
- let mut doc_set = &mut self.docsets[ord];
- match doc_set.skip_next(doc_candidate) {
- SkipResult::Reached => {
- count_matching += 1;
- if count_matching == num_docsets {
- self.doc = doc_candidate;
- return true;
+
+ let mut candidate_doc = self.doc;
+ let mut candidate_ord = self.docsets.len();
+
+ 'outer: loop {
+
+ for (ord, docset) in self.docsets.iter_mut().enumerate() {
+ if ord != candidate_ord {
+ // `candidate_ord` is already at the
+ // right position.
+ //
+ // Calling `skip_next` would advance this docset
+ // and miss it.
+ match docset.skip_next(candidate_doc) {
+ SkipResult::Reached => {}
+ SkipResult::OverStep => {
+ // this is not in the intersection,
+ // let's update our candidate.
+ candidate_doc = docset.doc();
+ candidate_ord = ord;
+ continue 'outer;
+ }
+ SkipResult::End => {
+ self.finished = true;
+ return false;
+ }
}
}
- SkipResult::End => {
- self.finished = true;
- return false;
- }
- SkipResult::OverStep => {
- count_matching = 1;
- doc_candidate = doc_set.doc();
- }
- }
- ord += 1;
- if ord == num_docsets {
- ord = 0;
}
+
+ self.doc = candidate_doc;
+ return true;
}
}
@@ -69,3 +84,51 @@ impl<TDocSet: DocSet> DocSet for IntersectionDocSet<TDocSet> {
self.doc
}
}
+
+
+#[cfg(test)]
+mod tests {
+
+ use postings::{DocSet, VecPostings, IntersectionDocSet};
+
+ #[test]
+ fn test_intersection() {
+ {
+ let left = VecPostings::from(vec![1, 3, 9]);
+ let right = VecPostings::from(vec![3, 4, 9, 18]);
+ let mut intersection = IntersectionDocSet::from(vec![left, right]);
+ assert!(intersection.advance());
+ assert_eq!(intersection.doc(), 3);
+ assert!(intersection.advance());
+ assert_eq!(intersection.doc(), 9);
+ assert!(!intersection.advance());
+ }
+ {
+ let a = VecPostings::from(vec![1, 3, 9]);
+ let b = VecPostings::from(vec![3, 4, 9, 18]);
+ let c = VecPostings::from(vec![1, 5, 9, 111]);
+ let mut intersection = IntersectionDocSet::from(vec![a, b, c]);
+ assert!(intersection.advance());
+ assert_eq!(intersection.doc(), 9);
+ assert!(!intersection.advance());
+ }
+ }
+
+ #[test]
+ fn test_intersection_zero() {
+ let left = VecPostings::from(vec![0]);
+ let right = VecPostings::from(vec![0]);
+ let mut intersection = IntersectionDocSet::from(vec![left, right]);
+ assert!(intersection.advance());
+ assert_eq!(intersection.doc(), 0);
+ }
+
+ #[test]
+ fn test_intersection_empty() {
+ let a = VecPostings::from(vec![1, 3]);
+ let b = VecPostings::from(vec![1, 4]);
+ let c = VecPostings::from(vec![3, 9]);
+ let mut intersection = IntersectionDocSet::from(vec![a, b, c]);
+ assert!(!intersection.advance());
+ }
+}
diff --git a/src/postings/mod.rs b/src/postings/mod.rs
index 647a7ae..5de1f7a 100644
--- a/src/postings/mod.rs
+++ b/src/postings/mod.rs
@@ -373,29 +373,6 @@ mod tests {
}
}
- #[test]
- fn test_intersection() {
- {
- let left = VecPostings::from(vec![1, 3, 9]);
- let right = VecPostings::from(vec![3, 4, 9, 18]);
- let mut intersection = IntersectionDocSet::from(vec![left, right]);
- assert!(intersection.advance());
- assert_eq!(intersection.doc(), 3);
- assert!(intersection.advance());
- assert_eq!(intersection.doc(), 9);
- assert!(!intersection.advance());
- }
- {
- let a = VecPostings::from(vec![1, 3, 9]);
- let b = VecPostings::from(vec![3, 4, 9, 18]);
- let c = VecPostings::from(vec![1, 5, 9, 111]);
- let mut intersection = IntersectionDocSet::from(vec![a, b, c]);
- assert!(intersection.advance());
- assert_eq!(intersection.doc(), 9);
- assert!(!intersection.advance());
- }
- }
-
lazy_static! {
static ref TERM_A: Term = {
@@ -406,6 +383,14 @@ mod tests {
let field = Field(0);
Term::from_field_text(field, "b")
};
+ static ref TERM_C: Term = {
+ let field = Field(0);
+ Term::from_field_text(field, "c")
+ };
+ static ref TERM_D: Term = {
+ let field = Field(0);
+ Term::from_field_text(field, "d")
+ };
static ref INDEX: Index = {
let mut schema_builder = SchemaBuilder::default();
let text_field = schema_builder.add_text_field("text", STRING);
@@ -415,25 +400,23 @@ mod tests {
let mut rng: XorShiftRng = XorShiftRng::from_seed(*seed);
let index = Index::create_in_ram(schema);
- let mut count_a = 0;
- let mut count_b = 0;
- let posting_list_size = 100_000;
+ let posting_list_size = 1_000_000;
{
let mut index_writer = index.writer_with_num_threads(1, 40_000_000).unwrap();
- for _ in 0 .. {
- if count_a >= posting_list_size &&
- count_b >= posting_list_size {
- break;
- }
+ for _ in 0 .. posting_list_size {
let mut doc = Document::default();
- if count_a < posting_list_size && rng.gen_weighted_bool(15) {
- count_a += 1;
+ if rng.gen_weighted_bool(15) {
doc.add_text(text_field, "a");
}
- if count_b < posting_list_size && rng.gen_weighted_bool(10) {
- count_b += 1;
+ if rng.gen_weighted_bool(10) {
doc.add_text(text_field, "b");
}
+ if rng.gen_weighted_bool(5) {
+ doc.add_text(text_field, "c");
+ }
+ if rng.gen_weighted_bool(1) {
+ doc.add_text(text_field, "d");
+ }
index_writer.add_document(doc);
}
assert!(index_writer.commit().is_ok());
@@ -467,8 +450,16 @@ mod tests {
let segment_postings_b = segment_reader
.read_postings(&*TERM_B, SegmentPostingsOption::NoFreq)
.unwrap();
+ let segment_postings_c = segment_reader
+ .read_postings(&*TERM_C, SegmentPostingsOption::NoFreq)
+ .unwrap();
+ let segment_postings_d = segment_reader
+ .read_postings(&*TERM_D, SegmentPostingsOption::NoFreq)
+ .unwrap();
let mut intersection = IntersectionDocSet::from(vec![segment_postings_a,
- segment_postings_b]);
+ segment_postings_b,
+ segment_postings_c,
+ segment_postings_d]);
while intersection.advance() {}
});
}
diff --git a/src/postings/segment_postings.rs b/src/postings/segment_postings.rs
index d6386d1..f429226 100644
--- a/src/postings/segment_postings.rs
+++ b/src/postings/segment_postings.rs
@@ -152,6 +152,10 @@ impl<'a> DocSet for SegmentPostings<'a> {
}
}
+ fn size_hint(&self) -> usize {
+ self.len()
+ }
+
#[inline]
fn doc(&self) -> DocId {
let docs = self.block_cursor.docs();
diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs
index d5e72fa..4a3078a 100644
--- a/src/postings/serializer.rs
+++ b/src/postings/serializer.rs
@@ -10,12 +10,11 @@ use directory::WritePtr;
use compression::{NUM_DOCS_PER_BLOCK, BlockEncoder, CompositeEncoder};
use DocId;
use core::Segment;
-use std::io;
-use core::SegmentComponent;
-use std::io::Write;
+use std::io::{self, Write};
use compression::VIntEncoder;
use common::VInt;
use common::BinarySerializable;
+use common::CountingWriter;
use termdict::TermDictiona