summaryrefslogtreecommitdiffstats
path: root/src/store
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2018-06-04 09:39:18 +0900
committerGitHub <noreply@github.com>2018-06-04 09:39:18 +0900
commitb59132966faf9b179ad942b94c9ac3e7d90b505e (patch)
tree8a5d1fe0a9c66f504881d0f8f7bbb7c6e097a9c8 /src/store
parent863d3411bc8609a689a691e4f8d1303ec791ee2a (diff)
Better heap (#311)
* Changed the heap to a paged memory arena. * Trying to simplify the indexing term hashmap * Exploding datastruct * Removed some complexity in bitpacker
Diffstat (limited to 'src/store')
-rw-r--r--src/store/mod.rs1
-rw-r--r--src/store/reader.rs2
-rw-r--r--src/store/skiplist/mod.rs169
-rw-r--r--src/store/skiplist/skiplist.rs133
-rw-r--r--src/store/skiplist/skiplist_builder.rs99
-rw-r--r--src/store/writer.rs2
6 files changed, 404 insertions, 2 deletions
diff --git a/src/store/mod.rs b/src/store/mod.rs
index 1eba49a..14f023a 100644
--- a/src/store/mod.rs
+++ b/src/store/mod.rs
@@ -33,6 +33,7 @@ and should rely on either
!*/
+mod skiplist;
mod reader;
mod writer;
pub use self::reader::StoreReader;
diff --git a/src/store/reader.rs b/src/store/reader.rs
index da69739..69e9ea1 100644
--- a/src/store/reader.rs
+++ b/src/store/reader.rs
@@ -2,7 +2,7 @@ use Result;
use common::BinarySerializable;
use common::VInt;
-use datastruct::SkipList;
+use super::skiplist::SkipList;
use directory::ReadOnlySource;
use lz4;
use schema::Document;
diff --git a/src/store/skiplist/mod.rs b/src/store/skiplist/mod.rs
new file mode 100644
index 0000000..bed5f61
--- /dev/null
+++ b/src/store/skiplist/mod.rs
@@ -0,0 +1,169 @@
+#![allow(dead_code)]
+
+mod skiplist;
+mod skiplist_builder;
+
+pub use self::skiplist::SkipList;
+pub use self::skiplist_builder::SkipListBuilder;
+
+#[cfg(test)]
+mod tests {
+
+ use super::{SkipList, SkipListBuilder};
+
+ #[test]
+ fn test_skiplist() {
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(8);
+ skip_list_builder.insert(2, &3).unwrap();
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ let mut skip_list: SkipList<u32> = SkipList::from(output.as_slice());
+ assert_eq!(skip_list.next(), Some((2, 3)));
+ }
+
+ #[test]
+ fn test_skiplist2() {
+ let mut output: Vec<u8> = Vec::new();
+ let skip_list_builder: SkipListBuilder<u32> = SkipListBuilder::new(8);
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ let mut skip_list: SkipList<u32> = SkipList::from(output.as_slice());
+ assert_eq!(skip_list.next(), None);
+ }
+
+ #[test]
+ fn test_skiplist3() {
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2);
+ skip_list_builder.insert(2, &()).unwrap();
+ skip_list_builder.insert(3, &()).unwrap();
+ skip_list_builder.insert(5, &()).unwrap();
+ skip_list_builder.insert(7, &()).unwrap();
+ skip_list_builder.insert(9, &()).unwrap();
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ let mut skip_list: SkipList<()> = SkipList::from(output.as_slice());
+ assert_eq!(skip_list.next().unwrap(), (2, ()));
+ assert_eq!(skip_list.next().unwrap(), (3, ()));
+ assert_eq!(skip_list.next().unwrap(), (5, ()));
+ assert_eq!(skip_list.next().unwrap(), (7, ()));
+ assert_eq!(skip_list.next().unwrap(), (9, ()));
+ assert_eq!(skip_list.next(), None);
+ }
+
+ #[test]
+ fn test_skiplist4() {
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2);
+ skip_list_builder.insert(2, &()).unwrap();
+ skip_list_builder.insert(3, &()).unwrap();
+ skip_list_builder.insert(5, &()).unwrap();
+ skip_list_builder.insert(7, &()).unwrap();
+ skip_list_builder.insert(9, &()).unwrap();
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ let mut skip_list: SkipList<()> = SkipList::from(output.as_slice());
+ assert_eq!(skip_list.next().unwrap(), (2, ()));
+ skip_list.seek(5);
+ assert_eq!(skip_list.next().unwrap(), (5, ()));
+ assert_eq!(skip_list.next().unwrap(), (7, ()));
+ assert_eq!(skip_list.next().unwrap(), (9, ()));
+ assert_eq!(skip_list.next(), None);
+ }
+
+ #[test]
+ fn test_skiplist5() {
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
+ skip_list_builder.insert(2, &()).unwrap();
+ skip_list_builder.insert(3, &()).unwrap();
+ skip_list_builder.insert(5, &()).unwrap();
+ skip_list_builder.insert(6, &()).unwrap();
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ let mut skip_list: SkipList<()> = SkipList::from(output.as_slice());
+ assert_eq!(skip_list.next().unwrap(), (2, ()));
+ skip_list.seek(6);
+ assert_eq!(skip_list.next().unwrap(), (6, ()));
+ assert_eq!(skip_list.next(), None);
+ }
+
+ #[test]
+ fn test_skiplist6() {
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(2);
+ skip_list_builder.insert(2, &()).unwrap();
+ skip_list_builder.insert(3, &()).unwrap();
+ skip_list_builder.insert(5, &()).unwrap();
+ skip_list_builder.insert(7, &()).unwrap();
+ skip_list_builder.insert(9, &()).unwrap();
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ let mut skip_list: SkipList<()> = SkipList::from(output.as_slice());
+ assert_eq!(skip_list.next().unwrap(), (2, ()));
+ skip_list.seek(10);
+ assert_eq!(skip_list.next(), None);
+ }
+
+ #[test]
+ fn test_skiplist7() {
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
+ for i in 0..1000 {
+ skip_list_builder.insert(i, &()).unwrap();
+ }
+ skip_list_builder.insert(1004, &()).unwrap();
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ let mut skip_list: SkipList<()> = SkipList::from(output.as_slice());
+ assert_eq!(skip_list.next().unwrap(), (0, ()));
+ skip_list.seek(431);
+ assert_eq!(skip_list.next().unwrap(), (431, ()));
+ skip_list.seek(1003);
+ assert_eq!(skip_list.next().unwrap(), (1004, ()));
+ assert_eq!(skip_list.next(), None);
+ }
+
+ #[test]
+ fn test_skiplist8() {
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<u64> = SkipListBuilder::new(8);
+ skip_list_builder.insert(2, &3).unwrap();
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ assert_eq!(output.len(), 11);
+ assert_eq!(output[0], 1u8 + 128u8);
+ }
+
+ #[test]
+ fn test_skiplist9() {
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<u64> = SkipListBuilder::new(4);
+ for i in 0..4 * 4 * 4 {
+ skip_list_builder.insert(i, &i).unwrap();
+ }
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ assert_eq!(output.len(), 774);
+ assert_eq!(output[0], 4u8 + 128u8);
+ }
+
+ #[test]
+ fn test_skiplist10() {
+ // checking that void gets serialized to nothing.
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
+ for i in 0..((4 * 4 * 4) - 1) {
+ skip_list_builder.insert(i, &()).unwrap();
+ }
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ assert_eq!(output.len(), 230);
+ assert_eq!(output[0], 128u8 + 3u8);
+ }
+
+ #[test]
+ fn test_skiplist11() {
+ // checking that void gets serialized to nothing.
+ let mut output: Vec<u8> = Vec::new();
+ let mut skip_list_builder: SkipListBuilder<()> = SkipListBuilder::new(4);
+ for i in 0..(4 * 4) {
+ skip_list_builder.insert(i, &()).unwrap();
+ }
+ skip_list_builder.write::<Vec<u8>>(&mut output).unwrap();
+ assert_eq!(output.len(), 65);
+ assert_eq!(output[0], 128u8 + 3u8);
+ }
+
+}
diff --git a/src/store/skiplist/skiplist.rs b/src/store/skiplist/skiplist.rs
new file mode 100644
index 0000000..d7e8ed6
--- /dev/null
+++ b/src/store/skiplist/skiplist.rs
@@ -0,0 +1,133 @@
+use common::{BinarySerializable, VInt};
+use std::cmp::max;
+use std::marker::PhantomData;
+
+static EMPTY: [u8; 0] = [];
+
+struct Layer<'a, T> {
+ data: &'a [u8],
+ cursor: &'a [u8],
+ next_id: Option<u64>,
+ _phantom_: PhantomData<T>,
+}
+
+impl<'a, T: BinarySerializable> Iterator for Layer<'a, T> {
+ type Item = (u64, T);
+
+ fn next(&mut self) -> Option<(u64, T)> {
+ if let Some(cur_id) = self.next_id {
+ let cur_val = T::deserialize(&mut self.cursor).unwrap();
+ self.next_id = VInt::deserialize_u64(&mut self.cursor).ok();
+ Some((cur_id, cur_val))
+ } else {
+ None
+ }
+ }
+}
+
+impl<'a, T: BinarySerializable> From<&'a [u8]> for Layer<'a, T> {
+ fn from(data: &'a [u8]) -> Layer<'a, T> {
+ let mut cursor = data;
+ let next_id = VInt::deserialize_u64(&mut cursor).ok();
+ Layer {
+ data,
+ cursor,
+ next_id,
+ _phantom_: PhantomData,
+ }
+ }
+}
+
+impl<'a, T: BinarySerializable> Layer<'a, T> {
+ fn empty() -> Layer<'a, T> {
+ Layer {
+ data: &EMPTY,
+ cursor: &EMPTY,
+ next_id: None,
+ _phantom_: PhantomData,
+ }
+ }
+
+ fn seek_offset(&mut self, offset: usize) {
+ self.cursor = &self.data[offset..];
+ self.next_id = VInt::deserialize_u64(&mut self.cursor).ok();
+ }
+
+ // Returns the last element (key, val)
+ // such that (key < doc_id)
+ //
+ // If there is no such element anymore,
+ // returns None.
+ //
+ // If the element exists, it will be returned
+ // at the next call to `.next()`.
+ fn seek(&mut self, key: u64) -> Option<(u64, T)> {
+ let mut result: Option<(u64, T)> = None;
+ loop {
+ if let Some(next_id) = self.next_id {
+ if next_id < key {
+ if let Some(v) = self.next() {
+ result = Some(v);
+ continue;
+ }
+ }
+ }
+ return result;
+ }
+ }
+}
+
+pub struct SkipList<'a, T: BinarySerializable> {
+ data_layer: Layer<'a, T>,
+ skip_layers: Vec<Layer<'a, u64>>,
+}
+
+impl<'a, T: BinarySerializable> Iterator for SkipList<'a, T> {
+ type Item = (u64, T);
+
+ fn next(&mut self) -> Option<(u64, T)> {
+ self.data_layer.next()
+ }
+}
+
+impl<'a, T: BinarySerializable> SkipList<'a, T> {
+ pub fn seek(&mut self, key: u64) -> Option<(u64, T)> {
+ let mut next_layer_skip: Option<(u64, u64)> = None;
+ for skip_layer in &mut self.skip_layers {
+ if let Some((_, offset)) = next_layer_skip {
+ skip_layer.seek_offset(offset as usize);
+ }
+ next_layer_skip = skip_layer.seek(key);
+ }
+ if let Some((_, offset)) = next_layer_skip {
+ self.data_layer.seek_offset(offset as usize);
+ }
+ self.data_layer.seek(key)
+ }
+}
+
+impl<'a, T: BinarySerializable> From<&'a [u8]> for SkipList<'a, T> {
+ fn from(mut data: &'a [u8]) -> SkipList<'a, T> {
+ let offsets: Vec<u64> = Vec::<VInt>::deserialize(&mut data)
+ .unwrap()
+ .into_iter()
+ .map(|el| el.0)
+ .collect();
+ let num_layers = offsets.len();
+ let layers_data: &[u8] = data;
+ let data_layer: Layer<'a, T> = if num_layers == 0 {
+ Layer::empty()
+ } else {
+ let first_layer_data: &[u8] = &layers_data[..offsets[0] as usize];
+ Layer::from(first_layer_data)
+ };
+ let skip_layers = (0..max(1, num_layers) - 1)
+ .map(|i| (offsets[i] as usize, offsets[i + 1] as usize))
+ .map(|(start, stop)| Layer::from(&layers_data[start..stop]))
+ .collect();
+ SkipList {
+ skip_layers,
+ data_layer,
+ }
+ }
+}
diff --git a/src/store/skiplist/skiplist_builder.rs b/src/store/skiplist/skiplist_builder.rs
new file mode 100644
index 0000000..6a698a2
--- /dev/null
+++ b/src/store/skiplist/skiplist_builder.rs
@@ -0,0 +1,99 @@
+use common::{is_power_of_2, BinarySerializable, VInt};
+use std::io;
+use std::io::Write;
+use std::marker::PhantomData;
+
+struct LayerBuilder<T: BinarySerializable> {
+ period_mask: usize,
+ buffer: Vec<u8>,
+ len: usize,
+ _phantom_: PhantomData<T>,
+}
+
+impl<T: BinarySerializable> LayerBuilder<T> {
+ fn written_size(&self) -> usize {
+ self.buffer.len()
+ }
+
+ fn write(&self, output: &mut Write) -> Result<(), io::Error> {
+ output.write_all(&self.buffer)?;
+ Ok(())
+ }
+
+ fn with_period(period: usize) -> LayerBuilder<T> {
+ assert!(is_power_of_2(period), "The period has to be a power of 2.");
+ LayerBuilder {
+ period_mask: (period - 1),
+ buffer: Vec::new(),
+ len: 0,
+ _phantom_: PhantomData,
+ }
+ }
+
+ fn insert(&mut self, key: u64, value: &T) -> io::Result<Option<(u64, u64)>> {
+ self.len += 1;
+ let offset = self.written_size() as u64;
+ VInt(key).serialize(&mut self.buffer)?;
+ value.serialize(&mut self.buffer)?;
+ let emit_skip_info = (self.period_mask & self.len) == 0;
+ if emit_skip_info {
+ Ok(Some((key, offset)))
+ } else {
+ Ok(None)
+ }
+ }
+}
+
+pub struct SkipListBuilder<T: BinarySerializable> {
+ period: usize,
+ data_layer: LayerBuilder<T>,
+ skip_layers: Vec<LayerBuilder<u64>>,
+}
+
+impl<T: BinarySerializable> SkipListBuilder<T> {
+ pub fn new(period: usize) -> SkipListBuilder<T> {
+ SkipListBuilder {
+ period,
+ data_layer: LayerBuilder::with_period(period),
+ skip_layers: Vec::new(),
+ }
+ }
+
+ fn get_skip_layer(&mut self, layer_id: usize) -> &mut LayerBuilder<u64> {
+ if layer_id == self.skip_layers.len() {
+ let layer_builder = LayerBuilder::with_period(self.period);
+ self.skip_layers.push(layer_builder);
+ }
+ &mut self.skip_layers[layer_id]
+ }
+
+ pub fn insert(&mut self, key: u64, dest: &T) -> io::Result<()> {
+ let mut layer_id = 0;
+ let mut skip_pointer = self.data_layer.insert(key, dest)?;
+ loop {
+ skip_pointer = match skip_pointer {
+ Some((skip_doc_id, skip_offset)) => self.get_skip_layer(layer_id)
+ .insert(skip_doc_id, &skip_offset)?,
+ None => {
+ return Ok(());
+ }
+ };
+ layer_id += 1;
+ }
+ }
+
+ pub fn write<W: Write>(self, output: &mut W) -> io::Result<()> {
+ let mut size: u64 = self.data_layer.buffer.len() as u64;
+ let mut layer_sizes = vec![VInt(size)];
+ for layer in self.skip_layers.iter().rev() {
+ size += layer.buffer.len() as u64;
+ layer_sizes.push(VInt(size));
+ }
+ layer_sizes.serialize(output)?;
+ self.data_layer.write(output)?;
+ for layer in self.skip_layers.iter().rev() {
+ layer.write(output)?;
+ }
+ Ok(())
+ }
+}
diff --git a/src/store/writer.rs b/src/store/writer.rs
index 052a5c0..7e002e4 100644
--- a/src/store/writer.rs
+++ b/src/store/writer.rs
@@ -1,7 +1,7 @@
use super::StoreReader;
use common::CountingWriter;
use common::{BinarySerializable, VInt};
-use datastruct::SkipListBuilder;
+use super::skiplist::SkipListBuilder;
use directory::WritePtr;
use lz4;
use schema::Document;