summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2018-05-07 19:50:35 -0700
committerPaul Masurel <paul.masurel@gmail.com>2018-05-07 19:50:35 -0700
commit9a0b7f9855f34eb772358e0cc1d00b4fa937c781 (patch)
treebb3c31aa5a7e39dc6a81d6aaf987e7572563d731
parent8e343b1ca3915b54134214c60b45d47effce60d6 (diff)
Rustfmt
-rw-r--r--examples/custom_tokenizer.rs378
-rw-r--r--src/collector/mod.rs2
-rw-r--r--src/compression/mod.rs6
-rw-r--r--src/core/segment_reader.rs4
-rw-r--r--src/datastruct/stacker/hashmap.rs8
-rw-r--r--src/fastfield/bytes/mod.rs2
-rw-r--r--src/fastfield/bytes/reader.rs7
-rw-r--r--src/fastfield/bytes/writer.rs13
-rw-r--r--src/fastfield/mod.rs2
-rw-r--r--src/fastfield/multivalued/writer.rs2
-rw-r--r--src/fastfield/serializer.rs5
-rw-r--r--src/fastfield/writer.rs9
-rw-r--r--src/fieldnorm/writer.rs3
-rw-r--r--src/indexer/merger.rs286
-rw-r--r--src/postings/postings_writer.rs20
-rw-r--r--src/postings/serializer.rs11
-rw-r--r--src/query/all_query.rs25
-rw-r--r--src/schema/field_entry.rs2
-rw-r--r--src/schema/field_type.rs25
-rw-r--r--src/schema/term.rs2
-rw-r--r--src/termdict/merger.rs19
-rw-r--r--src/termdict/mod.rs1
-rw-r--r--src/termdict/term_info_store.rs2
-rw-r--r--src/tokenizer/ngram_tokenizer.rs176
-rw-r--r--src/tokenizer/tokenizer.rs24
25 files changed, 514 insertions, 520 deletions
diff --git a/examples/custom_tokenizer.rs b/examples/custom_tokenizer.rs
index 4f498df..9ddd17e 100644
--- a/examples/custom_tokenizer.rs
+++ b/examples/custom_tokenizer.rs
@@ -13,99 +13,99 @@ use tantivy::Index;
use tempdir::TempDir;
fn main() {
- // Let's create a temporary directory for the
- // sake of this example
- if let Ok(dir) = TempDir::new("tantivy_token_example_dir") {
- run_example(dir.path()).unwrap();
- dir.close().unwrap();
- }
+ // Let's create a temporary directory for the
+ // sake of this example
+ if let Ok(dir) = TempDir::new("tantivy_token_example_dir") {
+ run_example(dir.path()).unwrap();
+ dir.close().unwrap();
+ }
}
fn run_example(index_path: &Path) -> tantivy::Result<()> {
- // # Defining the schema
- //
- // The Tantivy index requires a very strict schema.
- // The schema declares which fields are in the index,
- // and for each field, its type and "the way it should
- // be indexed".
-
- // first we need to define a schema ...
- let mut schema_builder = SchemaBuilder::default();
-
- // Our first field is title.
- // In this example we want to use NGram searching
- // we will set that to 3 characters, so any three
- // char in the title should be findable.
- let text_field_indexing = TextFieldIndexing::default()
- .set_tokenizer("ngram3")
- .set_index_option(IndexRecordOption::WithFreqsAndPositions);
- let text_options = TextOptions::default()
- .set_indexing_options(text_field_indexing)
- .set_stored();
- schema_builder.add_text_field("title", text_options);
-
- // Our second field is body.
- // We want full-text search for it, but we do not
- // need to be able to be able to retrieve it
- // for our application.
- //
- // We can make our index lighter and
- // by omitting `STORED` flag.
- schema_builder.add_text_field("body", TEXT);
-
- let schema = schema_builder.build();
-
- // # Indexing documents
- //
- // Let's create a brand new index.
- //
- // This will actually just save a meta.json
- // with our schema in the directory.
- let index = Index::create(index_path, schema.clone())?;
-
- // here we are registering our custome tokenizer
- // this will store tokens of 3 characters each
- index
- .tokenizers()
- .register("ngram3", NgramTokenizer::new(3, 3, false));
-
- // To insert document we need an index writer.
- // There must be only one writer at a time.
- // This single `IndexWriter` is already
- // multithreaded.
- //
- // Here we use a buffer of 50MB per thread. Using a bigger
- // heap for the indexer can increase its throughput.
- let mut index_writer = index.writer(50_000_000)?;
-
- // Let's index our documents!
- // We first need a handle on the title and the body field.
-
- // ### Create a document "manually".
- //
- // We can create a document manually, by setting the fields
- // one by one in a Document object.
- let title = schema.get_field("title").unwrap();
- let body = schema.get_field("body").unwrap();
-
- let mut old_man_doc = Document::default();
- old_man_doc.add_text(title, "The Old Man and the Sea");
- old_man_doc.add_text(
- body,
- "He was an old man who fished alone in a skiff in the Gulf Stream and \
- he had gone eighty-four days now without taking a fish.",
- );
-
- // ... and add it to the `IndexWriter`.
- index_writer.add_document(old_man_doc);
-
- // ### Create a document directly from json.
- //
- // Alternatively, we can use our schema to parse a
- // document object directly from json.
- // The document is a string, but we use the `json` macro
- // from `serde_json` for the convenience of multi-line support.
- let json = json!({
+ // # Defining the schema
+ //
+ // The Tantivy index requires a very strict schema.
+ // The schema declares which fields are in the index,
+ // and for each field, its type and "the way it should
+ // be indexed".
+
+ // first we need to define a schema ...
+ let mut schema_builder = SchemaBuilder::default();
+
+ // Our first field is title.
+ // In this example we want to use NGram searching
+ // we will set that to 3 characters, so any three
+ // char in the title should be findable.
+ let text_field_indexing = TextFieldIndexing::default()
+ .set_tokenizer("ngram3")
+ .set_index_option(IndexRecordOption::WithFreqsAndPositions);
+ let text_options = TextOptions::default()
+ .set_indexing_options(text_field_indexing)
+ .set_stored();
+ schema_builder.add_text_field("title", text_options);
+
+ // Our second field is body.
+ // We want full-text search for it, but we do not
+ // need to be able to be able to retrieve it
+ // for our application.
+ //
+ // We can make our index lighter and
+ // by omitting `STORED` flag.
+ schema_builder.add_text_field("body", TEXT);
+
+ let schema = schema_builder.build();
+
+ // # Indexing documents
+ //
+ // Let's create a brand new index.
+ //
+ // This will actually just save a meta.json
+ // with our schema in the directory.
+ let index = Index::create(index_path, schema.clone())?;
+
+ // here we are registering our custome tokenizer
+ // this will store tokens of 3 characters each
+ index
+ .tokenizers()
+ .register("ngram3", NgramTokenizer::new(3, 3, false));
+
+ // To insert document we need an index writer.
+ // There must be only one writer at a time.
+ // This single `IndexWriter` is already
+ // multithreaded.
+ //
+ // Here we use a buffer of 50MB per thread. Using a bigger
+ // heap for the indexer can increase its throughput.
+ let mut index_writer = index.writer(50_000_000)?;
+
+ // Let's index our documents!
+ // We first need a handle on the title and the body field.
+
+ // ### Create a document "manually".
+ //
+ // We can create a document manually, by setting the fields
+ // one by one in a Document object.
+ let title = schema.get_field("title").unwrap();
+ let body = schema.get_field("body").unwrap();
+
+ let mut old_man_doc = Document::default();
+ old_man_doc.add_text(title, "The Old Man and the Sea");
+ old_man_doc.add_text(
+ body,
+ "He was an old man who fished alone in a skiff in the Gulf Stream and \
+ he had gone eighty-four days now without taking a fish.",
+ );
+
+ // ... and add it to the `IndexWriter`.
+ index_writer.add_document(old_man_doc);
+
+ // ### Create a document directly from json.
+ //
+ // Alternatively, we can use our schema to parse a
+ // document object directly from json.
+ // The document is a string, but we use the `json` macro
+ // from `serde_json` for the convenience of multi-line support.
+ let json = json!({
"title": "Of Mice and Men",
"body": "A few miles south of Soledad, the Salinas River drops in close to the hillside \
bank and runs deep and green. The water is warm too, for it has slipped twinkling \
@@ -116,111 +116,111 @@ fn run_example(index_path: &Path) -> tantivy::Result<()> {
debris of the winter’s flooding; and sycamores with mottled, white, recumbent \
limbs and branches that arch over the pool"
});
- let mice_and_men_doc = schema.parse_document(&json.to_string())?;
+ let mice_and_men_doc = schema.parse_document(&json.to_string())?;
- index_writer.add_document(mice_and_men_doc);
+ index_writer.add_document(mice_and_men_doc);
- // Multi-valued field are allowed, they are
- // expressed in JSON by an array.
- // The following document has two titles.
- let json = json!({
+ // Multi-valued field are allowed, they are
+ // expressed in JSON by an array.
+ // The following document has two titles.
+ let json = json!({
"title": ["Frankenstein", "The Modern Prometheus"],
"body": "You will rejoice to hear that no disaster has accompanied the commencement of an \
enterprise which you have regarded with such evil forebodings. I arrived here \
yesterday, and my first task is to assure my dear sister of my welfare and \
increasing confidence in the success of my undertaking."
});
- let frankenstein_doc = schema.parse_document(&json.to_string())?;
-
- index_writer.add_document(frankenstein_doc);
-
- // This is an example, so we will only index 3 documents
- // here. You can check out tantivy's tutorial to index
- // the English wikipedia. Tantivy's indexing is rather fast.
- // Indexing 5 million articles of the English wikipedia takes
- // around 4 minutes on my computer!
-
- // ### Committing
- //
- // At this point our documents are not searchable.
- //
- //
- // We need to call .commit() explicitly to force the
- // index_writer to finish processing the documents in the queue,
- // flush the current index to the disk, and advertise
- // the existence of new documents.
- //
- // This call is blocking.
- index_writer.commit()?;
-
- // If `.commit()` returns correctly, then all of the
- // documents that have been added are guaranteed to be
- // persistently indexed.
- //
- // In the scenario of a crash or a power failure,
- // tantivy behaves as if has rolled back to its last
- // commit.
-
- // # Searching
- //
- // Let's search our index. Start by reloading
- // searchers in the index. This should be done
- // after every commit().
- index.load_searchers()?;
-
- // Afterwards create one (or more) searchers.
- //
- // You should create a searcher
- // every time you start a "search query".
- let searcher = index.searcher();
-
- // The query parser can interpret human queries.
- // Here, if the user does not specify which
- // field they want to search, tantivy will search
- // in both title and body.
- let query_parser = QueryParser::for_index(&index, vec![title, body]);
-
- // here we want to get a hit on the 'ken' in Frankenstein
- let query = query_parser.parse_query("ken")?;
-
- // A query defines a set of documents, as
- // well as the way they should be scored.
- //
- // A query created by the query parser is scored according
- // to a metric called Tf-Idf, and will consider
- // any document matching at least one of our terms.
-
- // ### Collectors
- //
- // We are not interested in all of the documents but
- // only in the top 10. Keeping track of our top 10 best documents
- // is the role of the TopCollector.
- let mut top_collector = TopCollector::with_limit(10);
-
- // We can now perform our query.
- searcher.search(&*query, &mut top_collector)?;
-
- // Our top collector now contains the 10
- // most relevant doc ids...
- let doc_addresses = top_collector.docs();
-
- // The actual documents still need to be
- // retrieved from Tantivy's store.
- //
- // Since the body field was not configured as stored,
- // the document returned will only contain
- // a title.
-
- for doc_address in doc_addresses {
- let retrieved_doc = searcher.doc(&doc_address)?;
- println!("{}", schema.to_json(&retrieved_doc));
- }
-
- // Wait for indexing and merging threads to shut down.
- // Usually this isn't needed, but in `main` we try to
- // delete the temporary directory and that fails on
- // Windows if the files are still open.
- index_writer.wait_merging_threads()?;
-
- Ok(())
+ let frankenstein_doc = schema.parse_document(&json.to_string())?;
+
+ index_writer.add_document(frankenstein_doc);
+
+ // This is an example, so we will only index 3 documents
+ // here. You can check out tantivy's tutorial to index
+ // the English wikipedia. Tantivy's indexing is rather fast.
+ // Indexing 5 million articles of the English wikipedia takes
+ // around 4 minutes on my computer!
+
+ // ### Committing
+ //
+ // At this point our documents are not searchable.
+ //
+ //
+ // We need to call .commit() explicitly to force the
+ // index_writer to finish processing the documents in the queue,
+ // flush the current index to the disk, and advertise
+ // the existence of new documents.
+ //
+ // This call is blocking.
+ index_writer.commit()?;
+
+ // If `.commit()` returns correctly, then all of the
+ // documents that have been added are guaranteed to be
+ // persistently indexed.
+ //
+ // In the scenario of a crash or a power failure,
+ // tantivy behaves as if has rolled back to its last
+ // commit.
+
+ // # Searching
+ //
+ // Let's search our index. Start by reloading
+ // searchers in the index. This should be done
+ // after every commit().
+ index.load_searchers()?;
+
+ // Afterwards create one (or more) searchers.
+ //
+ // You should create a searcher
+ // every time you start a "search query".
+ let searcher = index.searcher();
+
+ // The query parser can interpret human queries.
+ // Here, if the user does not specify which
+ // field they want to search, tantivy will search
+ // in both title and body.
+ let query_parser = QueryParser::for_index(&index, vec![title, body]);
+
+ // here we want to get a hit on the 'ken' in Frankenstein
+ let query = query_parser.parse_query("ken")?;
+
+ // A query defines a set of documents, as
+ // well as the way they should be scored.
+ //
+ // A query created by the query parser is scored according
+ // to a metric called Tf-Idf, and will consider
+ // any document matching at least one of our terms.
+
+ // ### Collectors
+ //
+ // We are not interested in all of the documents but
+ // only in the top 10. Keeping track of our top 10 best documents
+ // is the role of the TopCollector.
+ let mut top_collector = TopCollector::with_limit(10);
+
+ // We can now perform our query.
+ searcher.search(&*query, &mut top_collector)?;
+
+ // Our top collector now contains the 10
+ // most relevant doc ids...
+ let doc_addresses = top_collector.docs();
+
+ // The actual documents still need to be
+ // retrieved from Tantivy's store.
+ //
+ // Since the body field was not configured as stored,
+ // the document returned will only contain
+ // a title.
+
+ for doc_address in doc_addresses {
+ let retrieved_doc = searcher.doc(&doc_address)?;
+ println!("{}", schema.to_json(&retrieved_doc));
+ }
+
+ // Wait for indexing and merging threads to shut down.
+ // Usually this isn't needed, but in `main` we try to
+ // delete the temporary directory and that fails on
+ // Windows if the files are still open.
+ index_writer.wait_merging_threads()?;
+
+ Ok(())
}
diff --git a/src/collector/mod.rs b/src/collector/mod.rs
index 9c0d3ac..17ca931 100644
--- a/src/collector/mod.rs
+++ b/src/collector/mod.rs
@@ -90,12 +90,12 @@ pub mod tests {
use super::*;
use core::SegmentReader;
+ use fastfield::BytesFastFieldReader;
use fastfield::FastFieldReader;
use schema::Field;
use DocId;
use Score;
use SegmentLocalId;
- use fastfield::BytesFastFieldReader;
/// Stores all of the doc ids.
/// This collector is only used for tests.
diff --git a/src/compression/mod.rs b/src/compression/mod.rs
index 5261fea..0e6a189 100644
--- a/src/compression/mod.rs
+++ b/src/compression/mod.rs
@@ -274,10 +274,10 @@ pub mod tests {
mod bench {
use super::*;
- use test::Bencher;
- use rand::XorShiftRng;
- use rand::SeedableRng;
use rand::Rng;
+ use rand::SeedableRng;
+ use rand::XorShiftRng;
+ use test::Bencher;
fn generate_array_with_seed(n: usize, ratio: f32, seed_val: u32) -> Vec<u32> {
let seed: &[u32; 4] = &[1, 2, 3, seed_val];
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index 6c9d331..5482c8a 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -122,7 +122,7 @@ impl SegmentReader {
pub(crate) fn fast_field_reader_with_idx<Item: FastValue>(
&self,
field: Field,
- idx: usize
+ idx: usize,
) -> fastfield::Result<FastFieldReader<Item>> {
if let Some(ff_source) = self.fast_fields_composite.open_read_with_idx(field, idx) {
Ok(FastFieldReader::open(ff_source))
@@ -153,7 +153,7 @@ impl SegmentReader {
pub fn bytes_fast_field_reader(&self, field: Field) -> fastfield::Result<BytesFastFieldReader> {
let field_entry = self.schema.get_field_entry(field);
match field_entry.field_type() {
- &FieldType::Bytes => {},
+ &FieldType::Bytes => {}
_ => return Err(FastFieldNotAvailableError::new(field_entry)),
}
let idx_reader = self.fast_fields_composite
diff --git a/src/datastruct/stacker/hashmap.rs b/src/datastruct/stacker/hashmap.rs
index 36c0b13..f872f31 100644
--- a/src/datastruct/stacker/hashmap.rs
+++ b/src/datastruct/stacker/hashmap.rs
@@ -4,7 +4,6 @@ use std::iter;
use std::mem;
use std::slice;
-
mod murmurhash2 {
const SEED: u32 = 3_242_157_231u32;
@@ -12,7 +11,6 @@ mod murmurhash2 {
#[inline(always)]
pub fn murmurhash2(key: &[u8]) -> u32 {
-
let mut key_ptr: *const u32 = key.as_ptr() as *const u32;
let len = key.len() as u32;
let mut h: u32 = SEED ^ len;
@@ -54,7 +52,6 @@ mod murmurhash2 {
}
}
-
/// Split the thread memory budget into
/// - the heap size
/// - the hash table "table" itself.
@@ -217,14 +214,14 @@ impl<'a> TermHashMap<'a> {
}
}
-#[cfg(all(test, feature="unstable"))]
+#[cfg(all(test, feature = "unstable"))]
mod bench {
use super::murmurhash2::murmurhash2;
use test::Bencher;
#[bench]
fn bench_murmurhash2(b: &mut Bencher) {
- let keys: [&'static str; 3]= ["wer qwe qwe qwe ", "werbq weqweqwe2 ", "weraq weqweqwe3 "];
+ let keys: [&'static str; 3] = ["wer qwe qwe qwe ", "werbq weqweqwe2 ", "weraq weqweqwe3 "];
b.iter(|| {
let mut s = 0;
for &key in &keys {
@@ -324,7 +321,6 @@ mod tests {
assert_eq!(murmurhash2("abcdefghijklmnop".as_bytes()), 2350868870);
}
-
#[test]
fn test_murmur_collisions() {
let mut set: HashSet<u32> = HashSet::default();
diff --git a/src/fastfield/bytes/mod.rs b/src/fastfield/bytes/mod.rs
index add4e6b..48c021f 100644
--- a/src/fastfield/bytes/mod.rs
+++ b/src/fastfield/bytes/mod.rs
@@ -35,4 +35,4 @@ mod tests {
let long = vec![0u8; 1000];
assert_eq!(bytes_reader.get_val(4), long.as_slice());
}
-} \ No newline at end of file
+}
diff --git a/src/fastfield/bytes/reader.rs b/src/fastfield/bytes/reader.rs
index 8db65e7..9e4c879 100644
--- a/src/fastfield/bytes/reader.rs
+++ b/src/fastfield/bytes/reader.rs
@@ -25,10 +25,7 @@ impl BytesFastFieldReader {
values_source: ReadOnlySource,
) -> BytesFastFieldReader {
let values = OwningRef::new(values_source).map(|source| &source[..]);
- BytesFastFieldReader {
- idx_reader,
- values,
- }
+ BytesFastFieldReader { idx_reader, values }
}
/// Returns the bytes associated to the given `doc`
@@ -37,4 +34,4 @@ impl BytesFastFieldReader {
let stop = self.idx_reader.get(doc + 1) as usize;
&self.values[start..stop]
}
-} \ No newline at end of file
+}
diff --git a/src/fastfield/bytes/writer.rs b/src/fastfield/bytes/writer.rs
index 16aa330..568a542 100644
--- a/src/fastfield/bytes/writer.rs
+++ b/src/fastfield/bytes/writer.rs
@@ -1,4 +1,3 @@
-
use std::io;
use fastfield::serializer::FastFieldSerializer;
@@ -55,7 +54,10 @@ impl BytesFastFieldWriter {
if let &Value::Bytes(ref bytes) = field_value.value() {
self.vals.extend_from_slice(bytes);
} else {
- panic!("Bytes field contained non-Bytes Value!. Field {:?} = {:?}", self.field, field_value);
+ panic!(
+ "Bytes field contained non-Bytes Value!. Field {:?} = {:?}",
+ self.field, field_value
+ );
}
}
}
@@ -73,10 +75,7 @@ impl BytesFastFieldWriter {
}
/// Serializes the fast field values by pushing them to the `FastFieldSerializer`.
- pub fn serialize(
- &self,
- serializer: &mut FastFieldSerializer
- ) -> io::Result<()> {
+ pub fn serialize(&self, serializer: &mut FastFieldSerializer) -> io::Result<()> {
{
// writing the offset index
let mut doc_index_serializer =
@@ -94,4 +93,4 @@ impl BytesFastFieldWriter {
}
Ok(())
}
-} \ No newline at end of file
+}
diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs
index ba14c52..265d325 100644
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -23,6 +23,7 @@ values stored.
Read access performance is comparable to that of an array lookup.
*/
+pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
pub use self::delete::write_delete_bitset;
pub use self::delete::DeleteBitSet;
pub use self::error::{FastFieldNotAvailableError, Result};
@@ -31,7 +32,6 @@ pub use self::multivalued::{MultiValueIntFastFieldReader, MultiValueIntFastField
pub use self::reader::FastFieldReader;
pub use self::serializer::FastFieldSerializer;
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
-pub use self::bytes::{BytesFastFieldReader, BytesFastFieldWriter};
use common;
use schema::Cardinality;
use schema::FieldType;
diff --git a/src/fastfield/multivalued/writer.rs b/src/fastfield/multivalued/writer.rs
index 9991980..b872fa1 100644
--- a/src/fastfield/multivalued/writer.rs
+++ b/src/fastfield/multivalued/writer.rs
@@ -3,10 +3,10 @@ use fastfield::value_to_u64;
use fastfield::FastFieldSerializer;
use itertools::Itertools;
use postings::UnorderedTermId;
-use termdict::TermOrdinal;
use schema::{Document, Field};
use std::collections::HashMap;
use std::io;
+use termdict::TermOrdinal;
use DocId;
/// Writer for multi-valued (as in, more than one value per document)
diff --git a/src/fastfield/serializer.rs b/src/fastfield/serializer.rs
index 287fd76..ef781c2 100644
--- a/src/fastfield/serializer.rs
+++ b/src/fastfield/serializer.rs
@@ -65,7 +65,7 @@ impl FastFieldSerializer {
pub fn new_bytes_fast_field_with_idx(
&mut self,
field: Field,
- idx: usize
+ idx: usize,
) -> io::Result<FastBytesFieldSerializer<CountingWriter<WritePtr>>> {
let field_write = self.composite_write.for_field_with_idx(field, idx);
FastBytesFieldSerializer::open(field_write)
@@ -87,7 +87,6 @@ pub struct FastSingleFieldSerializer<'a, W: Write + 'a> {
}
impl<'a, W: Write> FastSingleFieldSerializer<'a, W> {
-
/// Creates a new fast field serializer.
///
/// The serializer in fact encode the values by bitpacking
@@ -144,4 +143,4 @@ impl<'a, W: Write> FastBytesFieldSerializer<'a, W> {
pub fn flush(&mut self) -> io::Result<()> {
self.write.flush()
}
-} \ No newline at end of file
+}
diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs
index c7aa081..1c3d11e 100644
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -1,12 +1,12 @@
+use super::multivalued::MultiValueIntFastFieldWriter;
use common;
use common::BinarySerializable;
use common::VInt;
use fastfield::{BytesFastFieldWriter, FastFieldSerializer};
use postings::UnorderedTermId;
-use schema::{FieldType, Cardinality, Document, Field, Schema};
+use schema::{Cardinality, Document, Field, FieldType, Schema};
use std::collections::HashMap;
use std::io;
-use super::multivalued::MultiValueIntFastFieldWriter;
use termdict::TermOrdinal;
/// The fastfieldswriter regroup all of the fast field writers.
@@ -89,10 +89,7 @@ impl FastFieldsWriter {
///
/// Returns None if the field does not exist, or is not
/// configured as a bytes fastfield in the schema.
- pub fn get_bytes_writer(
- &mut self,
- field: Field,
- ) -> Option<&mut BytesFastFieldWriter> {
+ pub fn get_bytes_writer(&mut self, field: Field) -> Option<&mut BytesFastFieldWriter> {
// TODO optimize
self.bytes_value_writers
.iter_mut()
diff --git a/src/fieldnorm/writer.rs b/src/fieldnorm/writer.rs
index e0b21fe..c36ea99 100644
--- a/src/fieldnorm/writer.rs
+++ b/src/fieldnorm/writer.rs
@@ -17,8 +17,7 @@ impl FieldNormsWriter {
.fields()
.iter()
.enumerate()
- .filter(|&(_, field_entry)|
- field_entry.is_indexed())
+ .filter(|&(_, field_entry)| field_entry.is_indexed())
.map(|(field, _)| Field(field as u32))
.collect::<Vec<Field>>()
}
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index 5d58e50..0e59434 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -6,6 +6,7 @@ use error::Result;
use fastfield::DeleteBitSet;
use fastfield::FastFieldReader;
use fastfield::FastFieldSerializer;
+use fastfield::MultiValueIntFastFieldReader;
use fieldnorm::FieldNormReader;
use fieldnorm::FieldNormsSerializer;
use fieldnorm::FieldNormsWriter;
@@ -13,16 +14,15 @@ use indexer::SegmentSerializer;
use itertools::Itertools;
use postings::InvertedIndexSerializer;
use postings::Postings;
+use schema::Cardinality;
+use schema::FieldType;
use schema::{Field, Schema};
+use std::cmp;
+use std::collections::HashMap;
use store::StoreWriter;
use termdict::TermMerger;
-use DocId;
-use schema::FieldType;
use termdict::TermOrdinal;
-use schema::Cardinality;
-use std::collections::HashMap;
-use fastfield::MultiValueIntFastFieldReader;
-use std::cmp;
+use DocId;
fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
let mut total_tokens = 0u64;
@@ -53,8 +53,6 @@ fn compute_total_num_tokens(readers: &[SegmentReader], field: Field) -> u64 {
.sum::<u64>()
}
-
-
pub struct IndexMerger {
schema: Schema,
readers: Vec<SegmentReader>,
@@ -89,7 +87,7 @@ fn compute_min_max_val(
}
struct TermOrdinalMapping {
- per_segment_new_term_ordinals: Vec<Vec<TermOrdinal>>
+ per_segment_new_term_ordinals: Vec<Vec<TermOrdinal>>,
}
impl TermOrdinalMapping {
@@ -98,14 +96,11 @@ impl TermOrdinalMapping {
per_segment_new_term_ordinals: max_term_ords
.into_iter()
.map(|max_term_ord| vec![TermOrdinal::default(); max_term_ord as usize])
- .collect()
+ .collect(),
}
}
- fn register_from_to(&mut self,
- segment_ord: usize,
- from_ord: TermOrdinal,
- to_ord: TermOrdinal) {
+ fn register_from_to(&mut self, segment_ord: usize, from_ord: TermOrdinal, to_ord: TermOrdinal) {
self.per_segment_new_term_ordinals[segment_ord][from_ord as usize] = to_ord;
}
@@ -116,9 +111,7 @@ impl TermOrdinalMapping {
fn max_term_ord(&self) -> TermOrdinal {
self.per_segment_new_term_ordinals
.iter()
- .flat_map(|term_ordinals| {
- term_ordinals.iter().cloned().max()
- })
+ .flat_map(|term_ordinals| term_ordinals.iter().cloned().max())
.max()
.unwrap_or(TermOrdinal::default())
}
@@ -185,9 +178,11 @@ impl IndexMerger {
Ok(())
}
- fn write_fast_fields(&self,
- fast_field_serializer: &mut FastFieldSerializer,
- mut term_ord_mappings: HashMap<Field, TermOrdinalMapping>) -> Result<()> {
+ fn write_fast_fields(
+ &self,
+ fast_field_serializer: &mut FastFieldSerializer,
+ mut term_ord_mappings: HashMap<Field, TermOrdinalMapping>,
+ ) -> Result<()> {
for (field_id, field_entry) in self.schema.fields().iter().enumerate() {
let field = Field(field_id as u32);
let field_type = field_entry.field_type();
@@ -198,17 +193,15 @@ impl IndexMerger {
.expect("Logic Error in Tantivy (Please report). HierarchicalFact field should have required a\
`term_ordinal_mapping`.");
self.write_hierarchical_facet_field(
- field,
- term_ordinal_mapping,
- fast_field_serializer)?;
+ field,
+ term_ordinal_mapping,
+ fast_field_serializer,
+ )?;
}
FieldType::U64(ref options) | FieldType::I64(ref options) => {
match options.get_fastfield_cardinality() {
Some(Cardinality::SingleValue) => {
- self.write_single_fast_field(
- field,
- fast_field_serializer
- )?;
+ self.write_single_fast_field(field, fast_field_serializer)?;
}
Some(Cardinality::MultiValues) => {
self.write_multi_fast_field(field, fast_field_serializer)?;
@@ -229,34 +222,25 @@ impl IndexMerger {
Ok(())
}
-
<