summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2018-09-15 07:44:22 +0900
committerGitHub <noreply@github.com>2018-09-15 07:44:22 +0900
commit37e4280c0a62943f70b3cfbf83c72fd10e494973 (patch)
treed8f85aaa347b520a8156f06307dae13c56322e83 /src
parent0ba1cf93f7049b881a5b6d19dc953a28322bc250 (diff)
Cargo Format (#420)
Diffstat (limited to 'src')
-rw-r--r--src/collector/facet_collector.rs25
-rw-r--r--src/collector/top_collector.rs6
-rw-r--r--src/collector/top_field_collector.rs18
-rw-r--r--src/collector/top_score_collector.rs4
-rw-r--r--src/common/composite_file.rs3
-rw-r--r--src/common/vint.rs16
-rw-r--r--src/core/index.rs72
-rw-r--r--src/core/inverted_index_reader.rs10
-rw-r--r--src/core/pool.rs6
-rw-r--r--src/core/searcher.rs17
-rw-r--r--src/core/segment_reader.rs19
-rw-r--r--src/directory/directory.rs12
-rw-r--r--src/directory/managed_directory.rs15
-rw-r--r--src/directory/mmap_directory.rs6
-rw-r--r--src/directory/ram_directory.rs10
-rw-r--r--src/directory/read_only_source.rs1
-rw-r--r--src/error.rs16
-rw-r--r--src/fastfield/delete.rs3
-rw-r--r--src/fastfield/facet_reader.rs3
-rw-r--r--src/fastfield/multivalued/writer.rs4
-rw-r--r--src/functional_test.rs2
-rw-r--r--src/indexer/delete_queue.rs12
-rw-r--r--src/indexer/directory_lock.rs51
-rw-r--r--src/indexer/index_writer.rs13
-rw-r--r--src/indexer/merge_policy.rs12
-rw-r--r--src/indexer/merger.rs3
-rw-r--r--src/indexer/segment_register.rs3
-rwxr-xr-xsrc/lib.rs5
-rw-r--r--src/positions/mod.rs28
-rw-r--r--src/positions/reader.rs55
-rw-r--r--src/positions/serializer.rs9
-rw-r--r--src/postings/compression/mod.rs27
-rw-r--r--src/postings/compression/vint.rs12
-rw-r--r--src/postings/mod.rs17
-rw-r--r--src/postings/postings_writer.rs3
-rw-r--r--src/postings/recorder.rs3
-rw-r--r--src/postings/segment_postings.rs156
-rw-r--r--src/postings/serializer.rs48
-rw-r--r--src/postings/skip.rs27
-rw-r--r--src/query/boolean_query/boolean_query.rs6
-rw-r--r--src/query/empty_query.rs8
-rw-r--r--src/query/mod.rs4
-rw-r--r--src/query/occur.rs2
-rw-r--r--src/query/phrase_query/phrase_query.rs2
-rw-r--r--src/query/phrase_query/phrase_scorer.rs3
-rw-r--r--src/query/phrase_query/phrase_weight.rs1
-rw-r--r--src/query/query.rs2
-rw-r--r--src/query/query_parser/query_grammar.rs42
-rw-r--r--src/query/query_parser/query_parser.rs121
-rw-r--r--src/query/query_parser/user_input_ast.rs24
-rw-r--r--src/query/range_query.rs5
-rw-r--r--src/query/scorer.rs1
-rw-r--r--src/query/term_query/term_query.rs2
-rw-r--r--src/schema/schema.rs5
-rw-r--r--src/snippet/mod.rs82
-rw-r--r--src/store/mod.rs8
-rw-r--r--src/store/skiplist/skiplist_builder.rs3
-rw-r--r--src/termdict/term_info_store.rs1
-rw-r--r--src/termdict/termdict.rs3
-rw-r--r--src/tokenizer/lower_caser.rs29
-rw-r--r--src/tokenizer/mod.rs2
-rw-r--r--src/tokenizer/raw_tokenizer.rs2
-rw-r--r--src/tokenizer/token_stream_chain.rs13
-rw-r--r--src/tokenizer/tokenizer.rs2
64 files changed, 593 insertions, 532 deletions
diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs
index 6c0bb64..8e1c958 100644
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -342,16 +342,19 @@ impl FacetCollector {
pub fn harvest(mut self) -> FacetCounts {
self.finalize_segment();
- let collapsed_facet_ords: Vec<&[u64]> = self.segment_counters
+ let collapsed_facet_ords: Vec<&[u64]> = self
+ .segment_counters
.iter()
.map(|segment_counter| &segment_counter.facet_ords[..])
.collect();
- let collapsed_facet_counts: Vec<&[u64]> = self.segment_counters
+ let collapsed_facet_counts: Vec<&[u64]> = self
+ .segment_counters
.iter()
.map(|segment_counter| &segment_counter.facet_counts[..])
.collect();
- let facet_streams = self.segment_counters
+ let facet_streams = self
+ .segment_counters
.iter()
.map(|seg_counts| seg_counts.facet_reader.facet_dict().range().into_stream())
.collect::<Vec<_>>();
@@ -402,7 +405,8 @@ impl Collector for FacetCollector {
fn collect(&mut self, doc: DocId, _: Score) {
let facet_reader: &mut FacetReader = unsafe {
- &mut *self.ff_reader
+ &mut *self
+ .ff_reader
.as_ref()
.expect("collect() was called before set_segment. This should never happen.")
.get()
@@ -476,9 +480,8 @@ impl FacetCounts {
heap.push(Hit { count, facet });
}
- let mut lowest_count: u64 = heap.peek().map(|hit| hit.count)
- .unwrap_or(u64::MIN); //< the `unwrap_or` case may be triggered but the value
- // is never used in that case.
+ let mut lowest_count: u64 = heap.peek().map(|hit| hit.count).unwrap_or(u64::MIN); //< the `unwrap_or` case may be triggered but the value
+ // is never used in that case.
for (facet, count) in it {
if count > lowest_count {
@@ -619,7 +622,13 @@ mod tests {
let doc = doc!(facet_field => facet);
iter::repeat(doc).take(count)
})
- .map(|mut doc| { doc.add_facet(facet_field, &format!("/facet/{}", thread_rng().sample(&uniform) )); doc})
+ .map(|mut doc| {
+ doc.add_facet(
+ facet_field,
+ &format!("/facet/{}", thread_rng().sample(&uniform)),
+ );
+ doc
+ })
.collect();
thread_rng().shuffle(&mut docs[..]);
diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs
index 64d2eee..6cb61e8 100644
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -1,8 +1,8 @@
+use std::cmp::Ordering;
+use std::collections::BinaryHeap;
use DocAddress;
use DocId;
use SegmentLocalId;
-use std::cmp::Ordering;
-use std::collections::BinaryHeap;
/// Contains a feature (field, score, etc.) of a document along with the document address.
///
@@ -139,9 +139,9 @@ impl<T: PartialOrd + Clone> TopCollector<T> {
#[cfg(test)]
mod tests {
+ use super::*;
use DocId;
use Score;
- use super::*;
#[test]
fn test_top_collector_not_at_capacity() {
diff --git a/src/collector/top_field_collector.rs b/src/collector/top_field_collector.rs
index ec2361b..3fb95d2 100644
--- a/src/collector/top_field_collector.rs
+++ b/src/collector/top_field_collector.rs
@@ -1,13 +1,13 @@
+use super::Collector;
use collector::top_collector::TopCollector;
-use DocAddress;
-use DocId;
use fastfield::FastFieldReader;
use fastfield::FastValue;
+use schema::Field;
+use DocAddress;
+use DocId;
use Result;
use Score;
use SegmentReader;
-use super::Collector;
-use schema::Field;
/// The Top Field Collector keeps track of the K documents
/// sorted by a fast field in the index
@@ -142,16 +142,16 @@ impl<T: FastValue + PartialOrd + Clone> Collector for TopFieldCollector<T> {
#[cfg(test)]
mod tests {
- use Index;
- use IndexWriter;
- use TantivyError;
+ use super::*;
use query::Query;
use query::QueryParser;
- use schema::{FAST, SchemaBuilder, TEXT};
use schema::Field;
use schema::IntOptions;
use schema::Schema;
- use super::*;
+ use schema::{SchemaBuilder, FAST, TEXT};
+ use Index;
+ use IndexWriter;
+ use TantivyError;
const TITLE: &str = "title";
const SIZE: &str = "size";
diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs
index 4a8ace8..68bf114 100644
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -1,3 +1,4 @@
+use super::Collector;
use collector::top_collector::TopCollector;
use DocAddress;
use DocId;
@@ -5,7 +6,6 @@ use Result;
use Score;
use SegmentLocalId;
use SegmentReader;
-use super::Collector;
/// The Top Score Collector keeps track of the K documents
/// sorted by their score.
@@ -131,10 +131,10 @@ impl Collector for TopScoreCollector {
#[cfg(test)]
mod tests {
+ use super::*;
use collector::Collector;
use DocId;
use Score;
- use super::*;
#[test]
fn test_top_collector_not_at_capacity() {
diff --git a/src/common/composite_file.rs b/src/common/composite_file.rs
index 2f3f71a..e7d657b 100644
--- a/src/common/composite_file.rs
+++ b/src/common/composite_file.rs
@@ -72,7 +72,8 @@ impl<W: Write> CompositeWrite<W> {
let footer_offset = self.write.written_bytes();
VInt(self.offsets.len() as u64).serialize(&mut self.write)?;
- let mut offset_fields: Vec<_> = self.offsets
+ let mut offset_fields: Vec<_> = self
+ .offsets
.iter()
.map(|(file_addr, offset)| (*offset, *file_addr))
.collect();
diff --git a/src/common/vint.rs b/src/common/vint.rs
index 308aff1..7b782a9 100644
--- a/src/common/vint.rs
+++ b/src/common/vint.rs
@@ -10,8 +10,6 @@ pub struct VInt(pub u64);
const STOP_BIT: u8 = 128;
impl VInt {
-
-
pub fn val(&self) -> u64 {
self.0
}
@@ -20,14 +18,13 @@ impl VInt {
VInt::deserialize(reader).map(|vint| vint.0)
}
- pub fn serialize_into_vec(&self, output: &mut Vec<u8>){
+ pub fn serialize_into_vec(&self, output: &mut Vec<u8>) {
let mut buffer = [0u8; 10];
let num_bytes = self.serialize_into(&mut buffer);
output.extend(&buffer[0..num_bytes]);
}
fn serialize_into(&self, buffer: &mut [u8; 10]) -> usize {
-
let mut remaining = self.0;
for (i, b) in buffer.iter_mut().enumerate() {
let next_byte: u8 = (remaining % 128u64) as u8;
@@ -74,7 +71,6 @@ impl BinarySerializable for VInt {
}
}
-
#[cfg(test)]
mod tests {
@@ -89,10 +85,10 @@ mod tests {
}
assert!(num_bytes > 0);
if num_bytes < 10 {
- assert!(1u64 << (7*num_bytes) > val);
+ assert!(1u64 << (7 * num_bytes) > val);
}
if num_bytes > 1 {
- assert!(1u64 << (7*(num_bytes-1)) <= val);
+ assert!(1u64 << (7 * (num_bytes - 1)) <= val);
}
let serdeser_val = VInt::deserialize(&mut &v[..]).unwrap();
assert_eq!(val, serdeser_val.0);
@@ -105,11 +101,11 @@ mod tests {
aux_test_vint(5);
aux_test_vint(u64::max_value());
for i in 1..9 {
- let power_of_128 = 1u64 << (7*i);
+ let power_of_128 = 1u64 << (7 * i);
aux_test_vint(power_of_128 - 1u64);
- aux_test_vint(power_of_128 );
+ aux_test_vint(power_of_128);
aux_test_vint(power_of_128 + 1u64);
}
aux_test_vint(10);
}
-} \ No newline at end of file
+}
diff --git a/src/core/index.rs b/src/core/index.rs
index a7bef84..3eafb90 100644
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -1,36 +1,36 @@
-use core::SegmentId;
-use error::TantivyError;
-use schema::Schema;
-use serde_json;
-use std::borrow::BorrowMut;
-use std::fmt;
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::Arc;
-use Result;
-use indexer::LockType;
use super::pool::LeasedItem;
use super::pool::Pool;
use super::segment::create_segment;
use super::segment::Segment;
use core::searcher::Searcher;
use core::IndexMeta;
+use core::SegmentId;
use core::SegmentMeta;
use core::SegmentReader;
use core::META_FILEPATH;
+use directory::ManagedDirectory;
#[cfg(feature = "mmap")]
use directory::MmapDirectory;
use directory::{Directory, RAMDirectory};
-use directory::{ManagedDirectory};
+use error::TantivyError;
use indexer::index_writer::open_index_writer;
use indexer::index_writer::HEAP_SIZE_MIN;
use indexer::segment_updater::save_new_metas;
+use indexer::LockType;
use num_cpus;
+use schema::Field;
+use schema::FieldType;
+use schema::Schema;
+use serde_json;
+use std::borrow::BorrowMut;
+use std::fmt;
use std::path::Path;
+use std::sync::atomic::{AtomicUsize, Ordering};
+use std::sync::Arc;
+use tokenizer::BoxedTokenizer;
use tokenizer::TokenizerManager;
use IndexWriter;
-use schema::FieldType;
-use schema::Field;
-use tokenizer::BoxedTokenizer;
+use Result;
fn load_metas(directory: &Directory) -> Result<IndexMeta> {
let meta_data = directory.atomic_read(&META_FILEPATH)?;
@@ -115,31 +115,24 @@ impl Index {
&self.tokenizers
}
-
/// Helper to access the tokenizer associated to a specific field.
pub fn tokenizer_for_field(&self, field: Field) -> Result<Box<BoxedTokenizer>> {
let field_entry = self.schema.get_field_entry(field);
let field_type = field_entry.field_type();
let tokenizer_manager: &TokenizerManager = self.tokenizers();
- let tokenizer_name_opt: Option<Box<BoxedTokenizer>> =
- match field_type {
- FieldType::Str(text_options) => {
- text_options
- .get_indexing_options()
- .map(|text_indexing_options| text_indexing_options.tokenizer().to_string())
- .and_then(|tokenizer_name| tokenizer_manager.get(&tokenizer_name))
- },
- _ => {
- None
- }
- };
+ let tokenizer_name_opt: Option<Box<BoxedTokenizer>> = match field_type {
+ FieldType::Str(text_options) => text_options
+ .get_indexing_options()
+ .map(|text_indexing_options| text_indexing_options.tokenizer().to_string())
+ .and_then(|tokenizer_name| tokenizer_manager.get(&tokenizer_name)),
+ _ => None,
+ };
match tokenizer_name_opt {
- Some(tokenizer) => {
- Ok(tokenizer)
- }
- None => {
- Err(TantivyError:: SchemaError(format!("{:?} is not a text field.", field_entry.name())))
- }
+ Some(tokenizer) => Ok(tokenizer),
+ None => Err(TantivyError::SchemaError(format!(
+ "{:?} is not a text field.",
+ field_entry.name()
+ ))),
}
}
@@ -186,7 +179,6 @@ impl Index {
num_threads: usize,
overall_heap_size_in_bytes: usize,
) -> Result<IndexWriter> {
-
let directory_lock = LockType::IndexWriterLock.acquire_lock(&self.directory)?;
let heap_size_in_bytes_per_thread = overall_heap_size_in_bytes / num_threads;
open_index_writer(
@@ -225,7 +217,8 @@ impl Index {
/// Returns the list of segments that are searchable
pub fn searchable_segments(&self) -> Result<Vec<Segment>> {
- Ok(self.searchable_segment_metas()?
+ Ok(self
+ .searchable_segment_metas()?
.into_iter()
.map(|segment_meta| self.segment(segment_meta))
.collect())
@@ -260,7 +253,8 @@ impl Index {
/// Returns the list of segment ids that are searchable.
pub fn searchable_segment_ids(&self) -> Result<Vec<SegmentId>> {
- Ok(self.searchable_segment_metas()?
+ Ok(self
+ .searchable_segment_metas()?
.iter()
.map(|segment_meta| segment_meta.id())
.collect())
@@ -332,11 +326,10 @@ impl Clone for Index {
}
}
-
#[cfg(test)]
mod tests {
+ use schema::{SchemaBuilder, INT_INDEXED, TEXT};
use Index;
- use schema::{SchemaBuilder, TEXT, INT_INDEXED};
#[test]
fn test_indexer_for_field() {
@@ -352,5 +345,4 @@ mod tests {
);
}
-
-} \ No newline at end of file
+}
diff --git a/src/core/inverted_index_reader.rs b/src/core/inverted_index_reader.rs
index b919e09..bb71be1 100644
--- a/src/core/inverted_index_reader.rs
+++ b/src/core/inverted_index_reader.rs
@@ -1,13 +1,13 @@
use common::BinarySerializable;
use directory::ReadOnlySource;
+use owned_read::OwnedRead;
+use positions::PositionReader;
use postings::TermInfo;
use postings::{BlockSegmentPostings, SegmentPostings};
use schema::FieldType;
use schema::IndexRecordOption;
use schema::Term;
use termdict::TermDictionary;
-use owned_read::OwnedRead;
-use positions::PositionReader;
/// The inverted index reader is in charge of accessing
/// the inverted index associated to a specific field.
@@ -100,7 +100,6 @@ impl InvertedIndexReader {
block_postings.reset(term_info.doc_freq, postings_reader);
}
-
/// Returns a block postings given a `Term`.
/// This method is for an advanced usage only.
///
@@ -111,7 +110,7 @@ impl InvertedIndexReader {
option: IndexRecordOption,
) -> Option<BlockSegmentPostings> {
self.get_term_info(term)
- .map(move|term_info| self.read_block_postings_from_terminfo(&term_info, option))
+ .map(move |term_info| self.read_block_postings_from_terminfo(&term_info, option))
}
/// Returns a block postings given a `term_info`.
@@ -147,7 +146,8 @@ impl InvertedIndexReader {
if option.has_positions() {
let position_reader = self.positions_source.clone();
let skip_reader = self.positions_idx_source.clone();
- let position_reader = PositionReader::new(position_reader, skip_reader, term_info.positions_idx);
+ let position_reader =
+ PositionReader::new(position_reader, skip_reader, term_info.positions_idx);
Some(position_reader)
} else {
None
diff --git a/src/core/pool.rs b/src/core/pool.rs
index 6098483..d8564e4 100644
--- a/src/core/pool.rs
+++ b/src/core/pool.rs
@@ -87,7 +87,8 @@ impl<T> Deref for LeasedItem<T> {
type Target = T;
fn deref(&self) -> &T {
- &self.gen_item
+ &self
+ .gen_item
.as_ref()
.expect("Unwrapping a leased item should never fail")
.item // unwrap is safe here
@@ -96,7 +97,8 @@ impl<T> Deref for LeasedItem<T> {
impl<T> DerefMut for LeasedItem<T> {
fn deref_mut(&mut self) -> &mut T {
- &mut self.gen_item
+ &mut self
+ .gen_item
.as_mut()
.expect("Unwrapping a mut leased item should never fail")
.item // unwrap is safe here
diff --git a/src/core/searcher.rs b/src/core/searcher.rs
index f17df04..cbe5490 100644
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -9,8 +9,8 @@ use std::fmt;
use std::sync::Arc;
use termdict::TermMerger;
use DocAddress;
-use Result;
use Index;
+use Result;
/// Holds a list of `SegmentReader`s ready for search.
///
@@ -25,7 +25,11 @@ pub struct Searcher {
impl Searcher {
/// Creates a new `Searcher`
- pub(crate) fn new(schema: Schema, index: Index, segment_readers: Vec<SegmentReader>) -> Searcher {
+ pub(crate) fn new(
+ schema: Schema,
+ index: Index,
+ segment_readers: Vec<SegmentReader>,
+ ) -> Searcher {
Searcher {
schema,
index,
@@ -87,7 +91,8 @@ impl Searcher {
/// Return the field searcher associated to a `Field`.
pub fn field(&self, field: Field) -> FieldSearcher {
- let inv_index_readers = self.segment_readers
+ let inv_index_readers = self
+ .segment_readers
.iter()
.map(|segment_reader| segment_reader.inverted_index(field))
.collect::<Vec<_>>();
@@ -107,7 +112,8 @@ impl FieldSearcher {
/// Returns a Stream over all of the sorted unique terms of
/// for the given field.
pub fn terms(&self) -> TermMerger {
- let term_streamers: Vec<_> = self.inv_index_readers
+ let term_streamers: Vec<_> = self
+ .inv_index_readers
.iter()
.map(|inverted_index| inverted_index.terms().stream())
.collect();
@@ -117,7 +123,8 @@ impl FieldSearcher {
impl fmt::Debug for Searcher {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- let segment_ids = self.segment_readers
+ let segment_ids = self
+ .segment_readers
.iter()
.map(|segment_reader| segment_reader.segment_id())
.collect::<Vec<_>>();
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index 56a3a7b..dff6cca 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -157,11 +157,13 @@ impl SegmentReader {
&FieldType::Bytes => {}
_ => return Err(FastFieldNotAvailableError::new(field_entry)),
}
- let idx_reader = self.fast_fields_composite
+ let idx_reader = self
+ .fast_fields_composite
.open_read_with_idx(field, 0)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
.map(FastFieldReader::open)?;
- let values = self.fast_fields_composite
+ let values = self
+ .fast_fields_composite
.open_read_with_idx(field, 1)
.ok_or_else(|| FastFieldNotAvailableError::new(field_entry))?;
Ok(BytesFastFieldReader::open(idx_reader, values))
@@ -285,7 +287,8 @@ impl SegmentReader {
/// term dictionary associated to a specific field,
/// and opening the posting list associated to any term.
pub fn inverted_index(&self, field: Field) -> Arc<InvertedIndexReader> {
- if let Some(inv_idx_reader) = self.inv_idx_reader_cache
+ if let Some(inv_idx_reader) = self
+ .inv_idx_reader_cache
.read()
.expect("Lock poisoned. This should never happen")
.get(&field)
@@ -314,15 +317,18 @@ impl SegmentReader {
let postings_source = postings_source_opt.unwrap();
- let termdict_source = self.termdict_composite