diff options
author | Paul Masurel <paul.masurel@gmail.com> | 2018-06-04 09:39:18 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-04 09:39:18 +0900 |
commit | b59132966faf9b179ad942b94c9ac3e7d90b505e (patch) | |
tree | 8a5d1fe0a9c66f504881d0f8f7bbb7c6e097a9c8 /src/store | |
parent | 863d3411bc8609a689a691e4f8d1303ec791ee2a (diff) |
Better heap (#311)
* Changed the heap to a paged memory arena.
* Trying to simplify the indexing term hashmap
* Exploding datastruct
* Removed some complexity in bitpacker
Diffstat (limited to 'src/store')
-rw-r--r-- | src/store/mod.rs | 1 | ||||
-rw-r--r-- | src/store/reader.rs | 2 | ||||
-rw-r--r-- | src/store/skiplist/mod.rs | 169 | ||||
-rw-r--r-- | src/store/skiplist/skiplist.rs | 133 | ||||
-rw-r--r-- | src/store/skiplist/skiplist_builder.rs | 99 | ||||
-rw-r--r-- | src/store/writer.rs | 2 |
6 files changed, 404 insertions, 2 deletions
diff --git a/src/store/mod.rs b/src/store/mod.rs index 1eba49a..14f023a 100644 --- a/src/store/mod.rs +++ b/src/store/mod.rs @@ -33,6 +33,7 @@ and should rely on either !*/ +mod skiplist; mod reader; mod writer; pub use self::reader::StoreReader; diff --git a/src/store/reader.rs b/src/store/reader.rs index da69739..69e9ea1 100644 --- a/src/store/reader.rs +++ b/src/store/reader.rs @@ -2,7 +2,7 @@ use Result; use common::BinarySerializable; use common::VInt; -use datastruct::SkipList; +use super::skiplist::SkipList; use directory::ReadOnlySource; use lz4; use schema::Document; diff --git a/src/store/skiplist/mod.rs b/src/store/skiplist/mod.rs new file mode 100644 index 0000000..bed5f61 --- /dev/null +++ b/src/store/skiplist/mod.rs @@ -0,0 +1,169 @@ +#![allow(dead_code)] + +mod skiplist; +mod skiplist_builder; + +pub use self::skiplist::SkipList; +pub use self::skiplist_builder::SkipListBuilder; + +#[cfg(test)] +mod tests { + + use super::{SkipList, SkipListBuilder}; + + #[test] + fn test_skiplist() { + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(8); + skip_list_builder.insert(2, &3).unwrap(); + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + let mut skip_list: SkipList<u32> = SkipList::from(output.as_slice()); + assert_eq!(skip_list.next(), Some((2, 3))); + } + + #[test] + fn test_skiplist2() { + let mut output: Vec<u8> = Vec::new(); + let skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(8); + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + let mut skip_list: SkipList<u32> = SkipList::from(output.as_slice()); + assert_eq!(skip_list.next(), None); + } + + #[test] + fn test_skiplist3() { + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2); + skip_list_builder.insert(2, &()).unwrap(); + skip_list_builder.insert(3, &()).unwrap(); + skip_list_builder.insert(5, &()).unwrap(); + skip_list_builder.insert(7, &()).unwrap(); + skip_list_builder.insert(9, &()).unwrap(); + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + let mut skip_list: SkipList<()> = SkipList::from(output.as_slice()); + assert_eq!(skip_list.next().unwrap(), (2, ())); + assert_eq!(skip_list.next().unwrap(), (3, ())); + assert_eq!(skip_list.next().unwrap(), (5, ())); + assert_eq!(skip_list.next().unwrap(), (7, ())); + assert_eq!(skip_list.next().unwrap(), (9, ())); + assert_eq!(skip_list.next(), None); + } + + #[test] + fn test_skiplist4() { + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2); + skip_list_builder.insert(2, &()).unwrap(); + skip_list_builder.insert(3, &()).unwrap(); + skip_list_builder.insert(5, &()).unwrap(); + skip_list_builder.insert(7, &()).unwrap(); + skip_list_builder.insert(9, &()).unwrap(); + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + let mut skip_list: SkipList<()> = SkipList::from(output.as_slice()); + assert_eq!(skip_list.next().unwrap(), (2, ())); + skip_list.seek(5); + assert_eq!(skip_list.next().unwrap(), (5, ())); + assert_eq!(skip_list.next().unwrap(), (7, ())); + assert_eq!(skip_list.next().unwrap(), (9, ())); + assert_eq!(skip_list.next(), None); + } + + #[test] + fn test_skiplist5() { + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4); + skip_list_builder.insert(2, &()).unwrap(); + skip_list_builder.insert(3, &()).unwrap(); + skip_list_builder.insert(5, &()).unwrap(); + skip_list_builder.insert(6, &()).unwrap(); + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + let mut skip_list: SkipList<()> = SkipList::from(output.as_slice()); + assert_eq!(skip_list.next().unwrap(), (2, ())); + skip_list.seek(6); + assert_eq!(skip_list.next().unwrap(), (6, ())); + assert_eq!(skip_list.next(), None); + } + + #[test] + fn test_skiplist6() { + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2); + skip_list_builder.insert(2, &()).unwrap(); + skip_list_builder.insert(3, &()).unwrap(); + skip_list_builder.insert(5, &()).unwrap(); + skip_list_builder.insert(7, &()).unwrap(); + skip_list_builder.insert(9, &()).unwrap(); + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + let mut skip_list: SkipList<()> = SkipList::from(output.as_slice()); + assert_eq!(skip_list.next().unwrap(), (2, ())); + skip_list.seek(10); + assert_eq!(skip_list.next(), None); + } + + #[test] + fn test_skiplist7() { + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4); + for i in 0..1000 { + skip_list_builder.insert(i, &()).unwrap(); + } + skip_list_builder.insert(1004, &()).unwrap(); + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + let mut skip_list: SkipList<()> = SkipList::from(output.as_slice()); + assert_eq!(skip_list.next().unwrap(), (0, ())); + skip_list.seek(431); + assert_eq!(skip_list.next().unwrap(), (431, ())); + skip_list.seek(1003); + assert_eq!(skip_list.next().unwrap(), (1004, ())); + assert_eq!(skip_list.next(), None); + } + + #[test] + fn test_skiplist8() { + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<u64> = SkipListBuilder::new(8); + skip_list_builder.insert(2, &3).unwrap(); + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + assert_eq!(output.len(), 11); + assert_eq!(output[0], 1u8 + 128u8); + } + + #[test] + fn test_skiplist9() { + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<u64> = SkipListBuilder::new(4); + for i in 0..4 * 4 * 4 { + skip_list_builder.insert(i, &i).unwrap(); + } + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + assert_eq!(output.len(), 774); + assert_eq!(output[0], 4u8 + 128u8); + } + + #[test] + fn test_skiplist10() { + // checking that void gets serialized to nothing. + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4); + for i in 0..((4 * 4 * 4) - 1) { + skip_list_builder.insert(i, &()).unwrap(); + } + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + assert_eq!(output.len(), 230); + assert_eq!(output[0], 128u8 + 3u8); + } + + #[test] + fn test_skiplist11() { + // checking that void gets serialized to nothing. + let mut output: Vec<u8> = Vec::new(); + let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4); + for i in 0..(4 * 4) { + skip_list_builder.insert(i, &()).unwrap(); + } + skip_list_builder.write::<Vec<u8>>(&mut output).unwrap(); + assert_eq!(output.len(), 65); + assert_eq!(output[0], 128u8 + 3u8); + } + +} diff --git a/src/store/skiplist/skiplist.rs b/src/store/skiplist/skiplist.rs new file mode 100644 index 0000000..d7e8ed6 --- /dev/null +++ b/src/store/skiplist/skiplist.rs @@ -0,0 +1,133 @@ +use common::{BinarySerializable, VInt}; +use std::cmp::max; +use std::marker::PhantomData; + +static EMPTY: [u8; 0] = []; + +struct Layer<'a, T> { + data: &'a [u8], + cursor: &'a [u8], + next_id: Option<u64>, + _phantom_: PhantomData<T>, +} + +impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> { + type Item = (u64, T); + + fn next(&mut self) -> Option<(u64, T)> { + if let Some(cur_id) = self.next_id { + let cur_val = T::deserialize(&mut self.cursor).unwrap(); + self.next_id = VInt::deserialize_u64(&mut self.cursor).ok(); + Some((cur_id, cur_val)) + } else { + None + } + } +} + +impl<'a, T: BinarySerializable> From<&'a [u8]> for Layer<'a, T> { + fn from(data: &'a [u8]) -> Layer<'a, T> { + let mut cursor = data; + let next_id = VInt::deserialize_u64(&mut cursor).ok(); + Layer { + data, + cursor, + next_id, + _phantom_: PhantomData, + } + } +} + +impl<'a, T: BinarySerializable> Layer<'a, T> { + fn empty() -> Layer<'a, T> { + Layer { + data: &EMPTY, + cursor: &EMPTY, + next_id: None, + _phantom_: PhantomData, + } + } + + fn seek_offset(&mut self, offset: usize) { + self.cursor = &self.data[offset..]; + self.next_id = VInt::deserialize_u64(&mut self.cursor).ok(); + } + + // Returns the last element (key, val) + // such that (key < doc_id) + // + // If there is no such element anymore, + // returns None. + // + // If the element exists, it will be returned + // at the next call to `.next()`. + fn seek(&mut self, key: u64) -> Option<(u64, T)> { + let mut result: Option<(u64, T)> = None; + loop { + if let Some(next_id) = self.next_id { + if next_id < key { + if let Some(v) = self.next() { + result = Some(v); + continue; + } + } + } + return result; + } + } +} + +pub struct SkipList<'a, T: BinarySerializable> { + data_layer: Layer<'a, T>, + skip_layers: Vec<Layer<'a, u64>>, +} + +impl<'a, T: BinarySerializable> Iterator for SkipList<'a, T> { + type Item = (u64, T); + + fn next(&mut self) -> Option<(u64, T)> { + self.data_layer.next() + } +} + +impl<'a, T: BinarySerializable> SkipList<'a, T> { + pub fn seek(&mut self, key: u64) -> Option<(u64, T)> { + let mut next_layer_skip: Option<(u64, u64)> = None; + for skip_layer in &mut self.skip_layers { + if let Some((_, offset)) = next_layer_skip { + skip_layer.seek_offset(offset as usize); + } + next_layer_skip = skip_layer.seek(key); + } + if let Some((_, offset)) = next_layer_skip { + self.data_layer.seek_offset(offset as usize); + } + self.data_layer.seek(key) + } +} + +impl<'a, T: BinarySerializable> From<&'a [u8]> for SkipList<'a, T> { + fn from(mut data: &'a [u8]) -> SkipList<'a, T> { + let offsets: Vec<u64> = Vec::<VInt>::deserialize(&mut data) + .unwrap() + .into_iter() + .map(|el| el.0) + .collect(); + let num_layers = offsets.len(); + let layers_data: &[u8] = data; + let data_layer: Layer<'a, T> = if num_layers == 0 { + Layer::empty() + } else { + let first_layer_data: &[u8] = &layers_data[..offsets[0] as usize]; + Layer::from(first_layer_data) + }; + let skip_layers = (0..max(1, num_layers) - 1) + .map(|i| (offsets[i] as usize, offsets[i + 1] as usize)) + .map(|(start, stop)| Layer::from(&layers_data[start..stop])) + .collect(); + SkipList { + skip_layers, + data_layer, + } + } +} diff --git a/src/store/skiplist/skiplist_builder.rs b/src/store/skiplist/skiplist_builder.rs new file mode 100644 index 0000000..6a698a2 --- /dev/null +++ b/src/store/skiplist/skiplist_builder.rs @@ -0,0 +1,99 @@ +use common::{is_power_of_2, BinarySerializable, VInt}; +use std::io; +use std::io::Write; +use std::marker::PhantomData; + +struct LayerBuilder<T: BinarySerializable> { + period_mask: usize, + buffer: Vec<u8>, + len: usize, + _phantom_: PhantomData<T>, +} + +impl<T: BinarySerializable> LayerBuilder<T> { + fn written_size(&self) -> usize { + self.buffer.len() + } + + fn write(&self, output: &mut Write) -> Result<(), io::Error> { + output.write_all(&self.buffer)?; + Ok(()) + } + + fn with_period(period: usize) -> LayerBuilder<T> { + assert!(is_power_of_2(period), "The period has to be a power of 2."); + LayerBuilder { + period_mask: (period - 1), + buffer: Vec::new(), + len: 0, + _phantom_: PhantomData, + } + } + + fn insert(&mut self, key: u64, value: &T) -> io::Result<Option<(u64, u64)>> { + self.len += 1; + let offset = self.written_size() as u64; + VInt(key).serialize(&mut self.buffer)?; + value.serialize(&mut self.buffer)?; + let emit_skip_info = (self.period_mask & self.len) == 0; + if emit_skip_info { + Ok(Some((key, offset))) + } else { + Ok(None) + } + } +} + +pub struct SkipListBuilder<T: BinarySerializable> { + period: usize, + data_layer: LayerBuilder<T>, + skip_layers: Vec<LayerBuilder<u64>>, +} + +impl<T: BinarySerializable> SkipListBuilder<T> { + pub fn new(period: usize) -> SkipListBuilder<T> { + SkipListBuilder { + period, + data_layer: LayerBuilder::with_period(period), + skip_layers: Vec::new(), + } + } + + fn get_skip_layer(&mut self, layer_id: usize) -> &mut LayerBuilder<u64> { + if layer_id == self.skip_layers.len() { + let layer_builder = LayerBuilder::with_period(self.period); + self.skip_layers.push(layer_builder); + } + &mut self.skip_layers[layer_id] + } + + pub fn insert(&mut self, key: u64, dest: &T) -> io::Result<()> { + let mut layer_id = 0; + let mut skip_pointer = self.data_layer.insert(key, dest)?; + loop { + skip_pointer = match skip_pointer { + Some((skip_doc_id, skip_offset)) => self.get_skip_layer(layer_id) + .insert(skip_doc_id, &skip_offset)?, + None => { + return Ok(()); + } + }; + layer_id += 1; + } + } + + pub fn write<W: Write>(self, output: &mut W) -> io::Result<()> { + let mut size: u64 = self.data_layer.buffer.len() as u64; + let mut layer_sizes = vec![VInt(size)]; + for layer in self.skip_layers.iter().rev() { + size += layer.buffer.len() as u64; + layer_sizes.push(VInt(size)); + } + layer_sizes.serialize(output)?; + self.data_layer.write(output)?; + for layer in self.skip_layers.iter().rev() { + layer.write(output)?; + } + Ok(()) + } +} diff --git a/src/store/writer.rs b/src/store/writer.rs index 052a5c0..7e002e4 100644 --- a/src/store/writer.rs +++ b/src/store/writer.rs @@ -1,7 +1,7 @@ use super::StoreReader; use common::CountingWriter; use common::{BinarySerializable, VInt}; -use datastruct::SkipListBuilder; +use super::skiplist::SkipListBuilder; use directory::WritePtr; use lz4; use schema::Document; |