summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--examples/simple_search.rs8
-rw-r--r--src/collector/chained_collector.rs9
-rw-r--r--src/collector/count_collector.rs12
-rw-r--r--src/collector/facet_collector.rs19
-rw-r--r--src/collector/mod.rs32
-rw-r--r--src/collector/multi_collector.rs13
-rw-r--r--src/collector/top_collector.rs24
-rw-r--r--src/common/bitpacker.rs12
-rw-r--r--src/common/composite_file.rs27
-rw-r--r--src/common/serialize.rs6
-rw-r--r--src/common/timer.rs18
-rw-r--r--src/common/vint.rs7
-rw-r--r--src/compression/mod.rs47
-rw-r--r--src/compression/pack/compression_pack_nosimd.rs28
-rw-r--r--src/compression/pack/compression_pack_simd.rs14
-rw-r--r--src/compression/stream.rs24
-rw-r--r--src/compression/vint/compression_vint_nosimd.rs15
-rw-r--r--src/compression/vint/compression_vint_simd.rs60
-rw-r--r--src/core/index.rs46
-rw-r--r--src/core/index_meta.rs2
-rw-r--r--src/core/inverted_index_reader.rs62
-rw-r--r--src/core/pool.rs30
-rw-r--r--src/core/searcher.rs27
-rw-r--r--src/core/segment.rs18
-rw-r--r--src/core/segment_component.rs16
-rw-r--r--src/core/segment_meta.rs24
-rw-r--r--src/core/segment_reader.rs96
-rw-r--r--src/datastruct/skip/skiplist_builder.rs16
-rw-r--r--src/datastruct/stacker/hashmap.rs40
-rw-r--r--src/datastruct/stacker/heap.rs40
-rw-r--r--src/directory/error.rs16
-rw-r--r--src/directory/managed_directory.rs94
-rw-r--r--src/directory/mmap_directory.rs178
-rw-r--r--src/directory/ram_directory.rs64
-rw-r--r--src/directory/read_only_source.rs3
-rw-r--r--src/error.rs9
-rw-r--r--src/fastfield/mod.rs45
-rw-r--r--src/fastfield/reader.rs30
-rw-r--r--src/fastfield/serializer.rs35
-rw-r--r--src/fastfield/writer.rs12
-rw-r--r--src/indexer/delete_queue.rs36
-rw-r--r--src/indexer/doc_opstamp_mapping.rs6
-rw-r--r--src/indexer/index_writer.rs182
-rw-r--r--src/indexer/log_merge_policy.rs50
-rw-r--r--src/indexer/merger.rs406
-rw-r--r--src/indexer/segment_entry.rs9
-rw-r--r--src/indexer/segment_manager.rs105
-rw-r--r--src/indexer/segment_register.rs69
-rw-r--r--src/indexer/segment_serializer.rs10
-rw-r--r--src/indexer/segment_updater.rs218
-rw-r--r--src/indexer/segment_writer.rs110
-rw-r--r--src/lib.rs117
-rw-r--r--src/postings/docset.rs3
-rw-r--r--src/postings/mod.rs78
-rw-r--r--src/postings/postings_writer.rs83
-rw-r--r--src/postings/recorder.rs63
-rw-r--r--src/postings/segment_postings.rs100
-rw-r--r--src/postings/segment_postings_option.rs1
-rw-r--r--src/postings/serializer.rs101
-rw-r--r--src/postings/term_info.rs2
-rw-r--r--src/query/boolean_query/boolean_query.rs17
-rw-r--r--src/query/boolean_query/boolean_scorer.rs10
-rw-r--r--src/query/boolean_query/boolean_weight.rs11
-rw-r--r--src/query/boolean_query/mod.rs26
-rw-r--r--src/query/phrase_query/mod.rs6
-rw-r--r--src/query/phrase_query/phrase_weight.rs6
-rw-r--r--src/query/query.rs5
-rw-r--r--src/query/query_parser/query_grammar.rs53
-rw-r--r--src/query/query_parser/query_parser.rs239
-rw-r--r--src/query/term_query/mod.rs6
-rw-r--r--src/query/term_query/term_scorer.rs12
-rw-r--r--src/query/term_query/term_weight.rs15
-rw-r--r--src/schema/field.rs2
-rw-r--r--src/schema/field_entry.rs20
-rw-r--r--src/schema/field_type.rs13
-rw-r--r--src/schema/schema.rs87
-rw-r--r--src/schema/term.rs10
-rw-r--r--src/schema/text_options.rs14
-rw-r--r--src/schema/value.rs16
-rw-r--r--src/store/mod.rs22
-rw-r--r--src/store/reader.rs8
-rw-r--r--src/store/writer.rs28
-rw-r--r--src/termdict/fstdict/streamer.rs26
-rw-r--r--src/termdict/fstdict/termdict.rs60
-rw-r--r--src/termdict/merger.rs33
-rw-r--r--src/termdict/mod.rs91
-rw-r--r--src/termdict/streamdict/delta_encoder.rs45
-rw-r--r--src/termdict/streamdict/mod.rs3
-rw-r--r--src/termdict/streamdict/streamer.rs69
-rw-r--r--src/termdict/streamdict/termdict.rs122
90 files changed, 2299 insertions, 1803 deletions
diff --git a/examples/simple_search.rs b/examples/simple_search.rs
index 0d35f0e..3cc82ae 100644
--- a/examples/simple_search.rs
+++ b/examples/simple_search.rs
@@ -91,9 +91,11 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
let mut old_man_doc = Document::default();
old_man_doc.add_text(title, "The Old Man and the Sea");
- old_man_doc.add_text(body,
- "He was an old man who fished alone in a skiff in the Gulf Stream and \
- he had gone eighty-four days now without taking a fish.");
+ old_man_doc.add_text(
+ body,
+ "He was an old man who fished alone in a skiff in the Gulf Stream and \
+ he had gone eighty-four days now without taking a fish.",
+ );
// ... and add it to the `IndexWriter`.
index_writer.add_document(old_man_doc);
diff --git a/src/collector/chained_collector.rs b/src/collector/chained_collector.rs
index 6cc5785..1dff3e3 100644
--- a/src/collector/chained_collector.rs
+++ b/src/collector/chained_collector.rs
@@ -38,10 +38,11 @@ impl<Left: Collector, Right: Collector> ChainedCollector<Left, Right> {
}
impl<Left: Collector, Right: Collector> Collector for ChainedCollector<Left, Right> {
- fn set_segment(&mut self,
- segment_local_id: SegmentLocalId,
- segment: &SegmentReader)
- -> Result<()> {
+ fn set_segment(
+ &mut self,
+ segment_local_id: SegmentLocalId,
+ segment: &SegmentReader,
+ ) -> Result<()> {
try!(self.left.set_segment(segment_local_id, segment));
try!(self.right.set_segment(segment_local_id, segment));
Ok(())
diff --git a/src/collector/count_collector.rs b/src/collector/count_collector.rs
index bfb17eb..1fd9613 100644
--- a/src/collector/count_collector.rs
+++ b/src/collector/count_collector.rs
@@ -45,11 +45,11 @@ mod tests {
#[bench]
fn build_collector(b: &mut Bencher) {
b.iter(|| {
- let mut count_collector = CountCollector::default();
- for doc in 0..1_000_000 {
- count_collector.collect(doc, 1f32);
- }
- count_collector.count()
- });
+ let mut count_collector = CountCollector::default();
+ for doc in 0..1_000_000 {
+ count_collector.collect(doc, 1f32);
+ }
+ count_collector.count()
+ });
}
}
diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs
index 2d760df..b998220 100644
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -15,8 +15,9 @@ use SegmentLocalId;
/// Facet collector for i64/u64 fast field
pub struct FacetCollector<T>
- where T: FastFieldReader,
- T::ValueType: Eq + Hash
+where
+ T: FastFieldReader,
+ T::ValueType: Eq + Hash,
{
counters: HashMap<T::ValueType, u64>,
field: Field,
@@ -25,8 +26,9 @@ pub struct FacetCollector<T>
impl<T> FacetCollector<T>
- where T: FastFieldReader,
- T::ValueType: Eq + Hash
+where
+ T: FastFieldReader,
+ T::ValueType: Eq + Hash,
{
/// Creates a new facet collector for aggregating a given field.
pub fn new(field: Field) -> FacetCollector<T> {
@@ -40,8 +42,9 @@ impl<T> FacetCollector<T>
impl<T> Collector for FacetCollector<T>
- where T: FastFieldReader,
- T::ValueType: Eq + Hash
+where
+ T: FastFieldReader,
+ T::ValueType: Eq + Hash,
{
fn set_segment(&mut self, _: SegmentLocalId, reader: &SegmentReader) -> Result<()> {
self.ff_reader = Some(reader.get_fast_field_reader(self.field)?);
@@ -51,7 +54,9 @@ impl<T> Collector for FacetCollector<T>
fn collect(&mut self, doc: DocId, _: Score) {
let val = self.ff_reader
.as_ref()
- .expect("collect() was called before set_segment. This should never happen.")
+ .expect(
+ "collect() was called before set_segment. This should never happen.",
+ )
.get(doc);
*(self.counters.entry(val).or_insert(0)) += 1;
}
diff --git a/src/collector/mod.rs b/src/collector/mod.rs
index 2743559..3832abb 100644
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -51,20 +51,22 @@ pub use self::chained_collector::chain;
pub trait Collector {
/// `set_segment` is called before beginning to enumerate
/// on this segment.
- fn set_segment(&mut self,
- segment_local_id: SegmentLocalId,
- segment: &SegmentReader)
- -> Result<()>;
+ fn set_segment(
+ &mut self,
+ segment_local_id: SegmentLocalId,
+ segment: &SegmentReader,
+ ) -> Result<()>;
/// The query pushes the scored document to the collector via this method.
fn collect(&mut self, doc: DocId, score: Score);
}
impl<'a, C: Collector> Collector for &'a mut C {
- fn set_segment(&mut self,
- segment_local_id: SegmentLocalId,
- segment: &SegmentReader)
- -> Result<()> {
+ fn set_segment(
+ &mut self,
+ segment_local_id: SegmentLocalId,
+ segment: &SegmentReader,
+ ) -> Result<()> {
(*self).set_segment(segment_local_id, segment)
}
/// The query pushes the scored document to the collector via this method.
@@ -169,12 +171,12 @@ pub mod tests {
#[bench]
fn build_collector(b: &mut Bencher) {
b.iter(|| {
- let mut count_collector = CountCollector::default();
- let docs: Vec<u32> = (0..1_000_000).collect();
- for doc in docs {
- count_collector.collect(doc, 1f32);
- }
- count_collector.count()
- });
+ let mut count_collector = CountCollector::default();
+ let docs: Vec<u32> = (0..1_000_000).collect();
+ for doc in docs {
+ count_collector.collect(doc, 1f32);
+ }
+ count_collector.count()
+ });
}
}
diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs
index c251578..2e6bf06 100644
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -23,10 +23,11 @@ impl<'a> MultiCollector<'a> {
impl<'a> Collector for MultiCollector<'a> {
- fn set_segment(&mut self,
- segment_local_id: SegmentLocalId,
- segment: &SegmentReader)
- -> Result<()> {
+ fn set_segment(
+ &mut self,
+ segment_local_id: SegmentLocalId,
+ segment: &SegmentReader,
+ ) -> Result<()> {
for collector in &mut self.collectors {
try!(collector.set_segment(segment_local_id, segment));
}
@@ -53,8 +54,8 @@ mod tests {
let mut top_collector = TopCollector::with_limit(2);
let mut count_collector = CountCollector::default();
{
- let mut collectors = MultiCollector::from(vec![&mut top_collector,
- &mut count_collector]);
+ let mut collectors =
+ MultiCollector::from(vec![&mut top_collector, &mut count_collector]);
collectors.collect(1, 0.2);
collectors.collect(2, 0.1);
collectors.collect(3, 0.5);
diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs
index 7d3c33c..e022c4b 100644
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -24,10 +24,9 @@ impl PartialOrd for GlobalScoredDoc {
impl Ord for GlobalScoredDoc {
#[inline]
fn cmp(&self, other: &GlobalScoredDoc) -> Ordering {
- other
- .score
- .partial_cmp(&self.score)
- .unwrap_or_else(|| other.doc_address.cmp(&self.doc_address))
+ other.score.partial_cmp(&self.score).unwrap_or_else(|| {
+ other.doc_address.cmp(&self.doc_address)
+ })
}
}
@@ -87,7 +86,9 @@ impl TopCollector {
scored_docs.sort();
scored_docs
.into_iter()
- .map(|GlobalScoredDoc { score, doc_address }| (score, doc_address))
+ .map(|GlobalScoredDoc { score, doc_address }| {
+ (score, doc_address)
+ })
.collect()
}
@@ -108,14 +109,13 @@ impl Collector for TopCollector {
fn collect(&mut self, doc: DocId, score: Score) {
if self.at_capacity() {
// It's ok to unwrap as long as a limit of 0 is forbidden.
- let limit_doc: GlobalScoredDoc =
- *self.heap
- .peek()
- .expect("Top collector with size 0 is forbidden");
+ let limit_doc: GlobalScoredDoc = *self.heap.peek().expect(
+ "Top collector with size 0 is forbidden",
+ );
if limit_doc.score < score {
- let mut mut_head = self.heap
- .peek_mut()
- .expect("Top collector with size 0 is forbidden");
+ let mut mut_head = self.heap.peek_mut().expect(
+ "Top collector with size 0 is forbidden",
+ );
mut_head.score = score;
mut_head.doc_address = DocAddress(self.segment_id, doc);
}
diff --git a/src/common/bitpacker.rs b/src/common/bitpacker.rs
index 7d7aeb2..a900ae9 100644
--- a/src/common/bitpacker.rs
+++ b/src/common/bitpacker.rs
@@ -88,7 +88,8 @@ impl BitPacker {
pub struct BitUnpacker<Data>
- where Data: Deref<Target = [u8]>
+where
+ Data: Deref<Target = [u8]>,
{
num_bits: usize,
mask: u64,
@@ -96,7 +97,8 @@ pub struct BitUnpacker<Data>
}
impl<Data> BitUnpacker<Data>
- where Data: Deref<Target = [u8]>
+where
+ Data: Deref<Target = [u8]>,
{
pub fn new(data: Data, num_bits: usize) -> BitUnpacker<Data> {
let mask: u64 = if num_bits == 64 {
@@ -121,8 +123,10 @@ impl<Data> BitUnpacker<Data>
let addr_in_bits = idx * num_bits;
let addr = addr_in_bits >> 3;
let bit_shift = addr_in_bits & 7;
- debug_assert!(addr + 8 <= data.len(),
- "The fast field field should have been padded with 7 bytes.");
+ debug_assert!(
+ addr + 8 <= data.len(),
+ "The fast field field should have been padded with 7 bytes."
+ );
let val_unshifted_unmasked: u64 = unsafe { *(data[addr..].as_ptr() as *const u64) };
let val_shifted = (val_unshifted_unmasked >> bit_shift) as u64;
(val_shifted & mask)
diff --git a/src/common/composite_file.rs b/src/common/composite_file.rs
index bc0d407..4ab843d 100644
--- a/src/common/composite_file.rs
+++ b/src/common/composite_file.rs
@@ -10,13 +10,12 @@ use common::BinarySerializable;
/// A `CompositeWrite` is used to write a `CompositeFile`.
-pub struct CompositeWrite<W=WritePtr> {
+pub struct CompositeWrite<W = WritePtr> {
write: CountingWriter<W>,
offsets: HashMap<Field, usize>,
}
impl<W: Write> CompositeWrite<W> {
-
/// Crate a new API writer that writes a composite file
/// in a given write.
pub fn wrap(w: W) -> CompositeWrite<W> {
@@ -43,7 +42,8 @@ impl<W: Write> CompositeWrite<W> {
let footer_offset = self.write.written_bytes();
VInt(self.offsets.len() as u64).serialize(&mut self.write)?;
- let mut offset_fields: Vec<_> = self.offsets.iter()
+ let mut offset_fields: Vec<_> = self.offsets
+ .iter()
.map(|(field, offset)| (offset, field))
.collect();
@@ -51,7 +51,9 @@ impl<W: Write> CompositeWrite<W> {
let mut prev_offset = 0;
for (offset, field) in offset_fields {
- VInt( (offset - prev_offset) as u64).serialize(&mut self.write)?;
+ VInt((offset - prev_offset) as u64).serialize(
+ &mut self.write,
+ )?;
field.serialize(&mut self.write)?;
prev_offset = *offset;
}
@@ -77,7 +79,6 @@ pub struct CompositeFile {
}
impl CompositeFile {
-
/// Opens a composite file stored in a given
/// `ReadOnlySource`.
pub fn open(data: ReadOnlySource) -> io::Result<CompositeFile> {
@@ -90,8 +91,8 @@ impl CompositeFile {
let mut footer_buffer = footer_data.as_slice();
let num_fields = VInt::deserialize(&mut footer_buffer)?.0 as usize;
- let mut fields = vec!();
- let mut offsets = vec!();
+ let mut fields = vec![];
+ let mut offsets = vec![];
let mut field_index = HashMap::new();
@@ -106,7 +107,7 @@ impl CompositeFile {
for i in 0..num_fields {
let field = fields[i];
let start_offset = offsets[i];
- let end_offset = offsets[i+1];
+ let end_offset = offsets[i + 1];
field_index.insert(field, (start_offset, end_offset));
}
@@ -128,11 +129,9 @@ impl CompositeFile {
/// Returns the `ReadOnlySource` associated
/// to a given `Field` and stored in a `CompositeFile`.
pub fn open_read(&self, field: Field) -> Option<ReadOnlySource> {
- self.offsets_index
- .get(&field)
- .map(|&(from, to)| {
- self.data.slice(from, to)
- })
+ self.offsets_index.get(&field).map(|&(from, to)| {
+ self.data.slice(from, to)
+ })
}
}
@@ -189,4 +188,4 @@ mod test {
}