From 66b4615e4e6ab66dfb67fb3d4fcbd68fb3051b5b Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Sun, 5 May 2019 13:52:43 +0900 Subject: Issue/542 (#543) * Closes 542. Fast fields are all loaded when the segment reader is created. --- src/collector/facet_collector.rs | 6 +- src/collector/multi_collector.rs | 6 +- src/collector/tests.rs | 17 ++- src/collector/top_collector.rs | 4 +- src/collector/top_field_collector.rs | 28 +++-- src/core/index.rs | 2 +- src/core/searcher.rs | 4 +- src/core/segment_reader.rs | 105 +++--------------- src/directory/ram_directory.rs | 2 +- src/fastfield/bytes/mod.rs | 14 +-- src/fastfield/bytes/reader.rs | 15 ++- src/fastfield/delete.rs | 20 ++-- src/fastfield/mod.rs | 10 +- src/fastfield/multivalued/mod.rs | 8 +- src/fastfield/multivalued/reader.rs | 42 ++++--- src/fastfield/reader.rs | 9 ++ src/fastfield/readers.rs | 191 ++++++++++++++++++++++++++++++++ src/indexer/log_merge_policy.rs | 2 +- src/indexer/merger.rs | 120 ++++++++++++++------ src/indexer/segment_register.rs | 2 +- src/indexer/segment_writer.rs | 3 +- src/lib.rs | 24 ++-- src/postings/serializer.rs | 2 +- src/query/phrase_query/phrase_query.rs | 3 +- src/query/query_parser/query_grammar.rs | 7 +- src/query/union.rs | 2 +- src/schema/document.rs | 4 +- src/schema/field_type.rs | 3 +- src/schema/schema.rs | 2 +- src/snippet/mod.rs | 3 +- src/space_usage/mod.rs | 2 +- src/tokenizer/tokenizer_manager.rs | 4 +- 32 files changed, 450 insertions(+), 216 deletions(-) create mode 100644 src/fastfield/readers.rs (limited to 'src') diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 16ce942..f86cc94 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -17,6 +17,7 @@ use Result; use Score; use SegmentLocalId; use SegmentReader; +use TantivyError; struct Hit<'a> { count: u64, @@ -264,7 +265,10 @@ impl Collector for FacetCollector { _: SegmentLocalId, reader: &SegmentReader, ) -> Result { - let facet_reader = reader.facet_reader(self.field)?; + let field_name = reader.schema().get_field_name(self.field); + let facet_reader = reader.facet_reader(self.field).ok_or_else(|| { + TantivyError::SchemaError(format!("Field {:?} is not a facet field.", field_name)) + })?; let mut collapse_mapping = Vec::new(); let mut counts = Vec::new(); diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs index cc3bfc4..43d5dde 100644 --- a/src/collector/multi_collector.rs +++ b/src/collector/multi_collector.rs @@ -2,6 +2,7 @@ use super::Collector; use super::SegmentCollector; use collector::Fruit; use std::marker::PhantomData; +use std::ops::Deref; use DocId; use Result; use Score; @@ -199,7 +200,10 @@ impl<'a> Collector for MultiCollector<'a> { } fn requires_scoring(&self) -> bool { - self.collector_wrappers.iter().any(|c| c.requires_scoring()) + self.collector_wrappers + .iter() + .map(Deref::deref) + .any(Collector::requires_scoring) } fn merge_fruits(&self, segments_multifruits: Vec) -> Result { diff --git a/src/collector/tests.rs b/src/collector/tests.rs index cc8bcff..424ceb3 100644 --- a/src/collector/tests.rs +++ b/src/collector/tests.rs @@ -114,11 +114,15 @@ impl Collector for FastFieldTestCollector { fn for_segment( &self, _: SegmentLocalId, - reader: &SegmentReader, + segment_reader: &SegmentReader, ) -> Result { + let reader = segment_reader + .fast_fields() + .u64(self.field) + .expect("Requested field is not a fast field."); Ok(FastFieldSegmentCollector { vals: Vec::new(), - reader: reader.fast_field_reader(self.field)?, + reader, }) } @@ -170,11 +174,14 @@ impl Collector for BytesFastFieldTestCollector { fn for_segment( &self, _segment_local_id: u32, - segment: &SegmentReader, + segment_reader: &SegmentReader, ) -> Result { Ok(BytesFastFieldSegmentCollector { vals: Vec::new(), - reader: segment.bytes_fast_field_reader(self.field)?, + reader: segment_reader + .fast_fields() + .bytes(self.field) + .expect("Field is not a bytes fast field."), }) } @@ -191,7 +198,7 @@ impl SegmentCollector for BytesFastFieldSegmentCollector { type Fruit = Vec; fn collect(&mut self, doc: u32, _score: f32) { - let data = self.reader.get_val(doc); + let data = self.reader.get_bytes(doc); self.vals.extend(data); } diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs index b17a7d6..880df69 100644 --- a/src/collector/top_collector.rs +++ b/src/collector/top_collector.rs @@ -98,11 +98,11 @@ where .collect()) } - pub(crate) fn for_segment( + pub(crate) fn for_segment( &self, segment_id: SegmentLocalId, _: &SegmentReader, - ) -> Result> { + ) -> Result> { Ok(TopSegmentCollector::new(segment_id, self.limit)) } } diff --git a/src/collector/top_field_collector.rs b/src/collector/top_field_collector.rs index b1a2d5e..b7cf7c9 100644 --- a/src/collector/top_field_collector.rs +++ b/src/collector/top_field_collector.rs @@ -5,10 +5,12 @@ use collector::SegmentCollector; use fastfield::FastFieldReader; use fastfield::FastValue; use schema::Field; +use std::marker::PhantomData; use DocAddress; use Result; use SegmentLocalId; use SegmentReader; +use TantivyError; /// The Top Field Collector keeps track of the K documents /// sorted by a fast field in the index @@ -106,8 +108,15 @@ impl Collector for TopDocsByF reader: &SegmentReader, ) -> Result> { let collector = self.collector.for_segment(segment_local_id, reader)?; - let reader = reader.fast_field_reader(self.field)?; - Ok(TopFieldSegmentCollector { collector, reader }) + let reader = reader.fast_fields().u64(self.field).ok_or_else(|| { + let field_name = reader.schema().get_field_name(self.field); + TantivyError::SchemaError(format!("Failed to find fast field reader {:?}", field_name)) + })?; + Ok(TopFieldSegmentCollector { + collector, + reader, + _type: PhantomData, + }) } fn requires_scoring(&self) -> bool { @@ -122,9 +131,10 @@ impl Collector for TopDocsByF } } -pub struct TopFieldSegmentCollector { - collector: TopSegmentCollector, - reader: FastFieldReader, +pub struct TopFieldSegmentCollector { + collector: TopSegmentCollector, + reader: FastFieldReader, + _type: PhantomData, } impl SegmentCollector @@ -138,7 +148,11 @@ impl SegmentCollector } fn harvest(self) -> Vec<(T, DocAddress)> { - self.collector.harvest() + self.collector + .harvest() + .into_iter() + .map(|(val, doc_address)| (T::from_u64(val), doc_address)) + .collect() } } @@ -235,7 +249,7 @@ mod tests { .for_segment(0, segment) .map(|_| ()) .unwrap_err(), - TantivyError::FastFieldError(_) + TantivyError::SchemaError(_) ); } diff --git a/src/core/index.rs b/src/core/index.rs index 8e1709c..8b486e5 100644 --- a/src/core/index.rs +++ b/src/core/index.rs @@ -340,7 +340,7 @@ impl Index { Ok(self .searchable_segment_metas()? .iter() - .map(|segment_meta| segment_meta.id()) + .map(SegmentMeta::id) .collect()) } } diff --git a/src/core/searcher.rs b/src/core/searcher.rs index 9e74fdd..3c0c745 100644 --- a/src/core/searcher.rs +++ b/src/core/searcher.rs @@ -59,7 +59,7 @@ impl Searcher { ) -> Searcher { let store_readers = segment_readers .iter() - .map(|segment_reader| segment_reader.get_store_reader()) + .map(SegmentReader::get_store_reader) .collect(); Searcher { schema, @@ -218,7 +218,7 @@ impl fmt::Debug for Searcher { let segment_ids = self .segment_readers .iter() - .map(|segment_reader| segment_reader.segment_id()) + .map(SegmentReader::segment_id) .collect::>(); write!(f, "Searcher({:?})", segment_ids) } diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs index a8ac2b0..2dbb5ad 100644 --- a/src/core/segment_reader.rs +++ b/src/core/segment_reader.rs @@ -5,14 +5,10 @@ use core::Segment; use core::SegmentComponent; use core::SegmentId; use directory::ReadOnlySource; -use error::TantivyError; use fastfield::DeleteBitSet; use fastfield::FacetReader; -use fastfield::FastFieldReader; -use fastfield::{self, FastFieldNotAvailableError}; -use fastfield::{BytesFastFieldReader, FastValue, MultiValueIntFastFieldReader}; +use fastfield::FastFieldReaders; use fieldnorm::FieldNormReader; -use schema::Cardinality; use schema::Field; use schema::FieldType; use schema::Schema; @@ -51,7 +47,7 @@ pub struct SegmentReader { postings_composite: CompositeFile, positions_composite: CompositeFile, positions_idx_composite: CompositeFile, - fast_fields_composite: CompositeFile, + fast_fields_readers: Arc, fieldnorms_composite: CompositeFile, store_source: ReadOnlySource, @@ -105,93 +101,21 @@ impl SegmentReader { /// /// # Panics /// May panic if the index is corrupted. - pub fn fast_field_reader( - &self, - field: Field, - ) -> fastfield::Result> { - let field_entry = self.schema.get_field_entry(field); - if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue) - { - self.fast_fields_composite - .open_read(field) - .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) - .map(FastFieldReader::open) - } else { - Err(FastFieldNotAvailableError::new(field_entry)) - } - } - - pub(crate) fn fast_field_reader_with_idx( - &self, - field: Field, - idx: usize, - ) -> fastfield::Result> { - if let Some(ff_source) = self.fast_fields_composite.open_read_with_idx(field, idx) { - Ok(FastFieldReader::open(ff_source)) - } else { - let field_entry = self.schema.get_field_entry(field); - Err(FastFieldNotAvailableError::new(field_entry)) - } - } - - /// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`. - /// May panick if the field is not a multivalued fastfield of the type `Item`. - pub fn multi_fast_field_reader( - &self, - field: Field, - ) -> fastfield::Result> { - let field_entry = self.schema.get_field_entry(field); - if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues) - { - let idx_reader = self.fast_field_reader_with_idx(field, 0)?; - let vals_reader = self.fast_field_reader_with_idx(field, 1)?; - Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader)) - } else { - Err(FastFieldNotAvailableError::new(field_entry)) - } - } - - /// Accessor to the `BytesFastFieldReader` associated to a given `Field`. - pub fn bytes_fast_field_reader(&self, field: Field) -> fastfield::Result { - let field_entry = self.schema.get_field_entry(field); - match *field_entry.field_type() { - FieldType::Bytes => {} - _ => return Err(FastFieldNotAvailableError::new(field_entry)), - } - let idx_reader = self - .fast_fields_composite - .open_read_with_idx(field, 0) - .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) - .map(FastFieldReader::open)?; - let values = self - .fast_fields_composite - .open_read_with_idx(field, 1) - .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))?; - Ok(BytesFastFieldReader::open(idx_reader, values)) + pub fn fast_fields(&self) -> &FastFieldReaders { + &self.fast_fields_readers } /// Accessor to the `FacetReader` associated to a given `Field`. - pub fn facet_reader(&self, field: Field) -> Result { + pub fn facet_reader(&self, field: Field) -> Option { let field_entry = self.schema.get_field_entry(field); if field_entry.field_type() != &FieldType::HierarchicalFacet { - return Err(TantivyError::InvalidArgument(format!( - "The field {:?} is not a \ - hierarchical facet.", - field_entry - ))); + return None; } - let term_ords_reader = self.multi_fast_field_reader(field)?; - let termdict_source = self.termdict_composite.open_read(field).ok_or_else(|| { - TantivyError::InvalidArgument(format!( - "The field \"{}\" is a hierarchical \ - but this segment does not seem to have the field term \ - dictionary.", - field_entry.name() - )) - })?; + let term_ords_reader = self.fast_fields().u64s(field)?; + let termdict_source = self.termdict_composite.open_read(field)?; let termdict = TermDictionary::from_source(&termdict_source); let facet_reader = FacetReader::new(term_ords_reader, termdict); - Ok(facet_reader) + Some(facet_reader) } /// Accessor to the segment's `Field norms`'s reader. @@ -247,8 +171,12 @@ impl SegmentReader { } }; + let schema = segment.schema(); + let fast_fields_data = segment.open_read(SegmentComponent::FASTFIELDS)?; let fast_fields_composite = CompositeFile::open(&fast_fields_data)?; + let fast_field_readers = + Arc::new(FastFieldReaders::load_all(&schema, &fast_fields_composite)?); let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?; let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?; @@ -260,14 +188,13 @@ impl SegmentReader { None }; - let schema = segment.schema(); Ok(SegmentReader { inv_idx_reader_cache: Arc::new(RwLock::new(HashMap::new())), max_doc: segment.meta().max_doc(), num_docs: segment.meta().num_docs(), termdict_composite, postings_composite, - fast_fields_composite, + fast_fields_readers: fast_field_readers, fieldnorms_composite, segment_id: segment.id(), store_source, @@ -381,12 +308,12 @@ impl SegmentReader { self.postings_composite.space_usage(), self.positions_composite.space_usage(), self.positions_idx_composite.space_usage(), - self.fast_fields_composite.space_usage(), + self.fast_fields_readers.space_usage(), self.fieldnorms_composite.space_usage(), self.get_store_reader().space_usage(), self.delete_bitset_opt .as_ref() - .map(|x| x.space_usage()) + .map(DeleteBitSet::space_usage) .unwrap_or(0), ) } diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs index 9851177..804763e 100644 --- a/src/directory/ram_directory.rs +++ b/src/directory/ram_directory.rs @@ -86,7 +86,7 @@ impl InnerDirectory { self.fs .get(path) .ok_or_else(|| OpenReadError::FileDoesNotExist(PathBuf::from(path))) - .map(|el| el.clone()) + .map(Clone::clone) } fn delete(&mut self, path: &Path) -> result::Result<(), DeleteError> { diff --git a/src/fastfield/bytes/mod.rs b/src/fastfield/bytes/mod.rs index b3e73a5..0106d3a 100644 --- a/src/fastfield/bytes/mod.rs +++ b/src/fastfield/bytes/mod.rs @@ -23,14 +23,14 @@ mod tests { index_writer.add_document(doc!(field=>vec![0u8; 1000])); assert!(index_writer.commit().is_ok()); let searcher = index.reader().unwrap().searcher(); - let reader = searcher.segment_reader(0); - let bytes_reader = reader.bytes_fast_field_reader(field).unwrap(); + let segment_reader = searcher.segment_reader(0); + let bytes_reader = segment_reader.fast_fields().bytes(field).unwrap(); - assert_eq!(bytes_reader.get_val(0), &[0u8, 1, 2, 3]); - assert!(bytes_reader.get_val(1).is_empty()); - assert_eq!(bytes_reader.get_val(2), &[255u8]); - assert_eq!(bytes_reader.get_val(3), &[1u8, 3, 5, 7, 9]); + assert_eq!(bytes_reader.get_bytes(0), &[0u8, 1, 2, 3]); + assert!(bytes_reader.get_bytes(1).is_empty()); + assert_eq!(bytes_reader.get_bytes(2), &[255u8]); + assert_eq!(bytes_reader.get_bytes(3), &[1u8, 3, 5, 7, 9]); let long = vec![0u8; 1000]; - assert_eq!(bytes_reader.get_val(4), long.as_slice()); + assert_eq!(bytes_reader.get_bytes(4), long.as_slice()); } } diff --git a/src/fastfield/bytes/reader.rs b/src/fastfield/bytes/reader.rs index 9e4c879..11b6520 100644 --- a/src/fastfield/bytes/reader.rs +++ b/src/fastfield/bytes/reader.rs @@ -14,6 +14,7 @@ use DocId; /// /// Reading the value for a document is done by reading the start index for it, /// and the start index for the next document, and keeping the bytes in between. +#[derive(Clone)] pub struct BytesFastFieldReader { idx_reader: FastFieldReader, values: OwningRef, @@ -28,10 +29,20 @@ impl BytesFastFieldReader { BytesFastFieldReader { idx_reader, values } } - /// Returns the bytes associated to the given `doc` - pub fn get_val(&self, doc: DocId) -> &[u8] { + fn range(&self, doc: DocId) -> (usize, usize) { let start = self.idx_reader.get(doc) as usize; let stop = self.idx_reader.get(doc + 1) as usize; + (start, stop) + } + + /// Returns the bytes associated to the given `doc` + pub fn get_bytes(&self, doc: DocId) -> &[u8] { + let (start, stop) = self.range(doc); &self.values[start..stop] } + + /// Returns the overall number of bytes in this bytes fast field. + pub fn total_num_bytes(&self) -> usize { + self.values.len() + } } diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs index 76ff7e4..d77dcc0 100644 --- a/src/fastfield/delete.rs +++ b/src/fastfield/delete.rs @@ -53,16 +53,18 @@ impl DeleteBitSet { } } - /// Returns whether the document has been marked as deleted. + /// Returns true iff the document is still "alive". In other words, if it has not been deleted. + pub fn is_alive(&self, doc: DocId) -> bool { + !self.is_deleted(doc) + } + + /// Returns true iff the document has been marked as deleted. + #[inline(always)] pub fn is_deleted(&self, doc: DocId) -> bool { - if self.len == 0 { - false - } else { - let byte_offset = doc / 8u32; - let b: u8 = (*self.data)[byte_offset as usize]; - let shift = (doc & 7u32) as u8; - b & (1u8 << shift) != 0 - } + let byte_offset = doc / 8u32; + let b: u8 = (*self.data)[byte_offset as usize]; + let shift = (doc & 7u32) as u8; + b & (1u8 << shift) != 0 } /// Summarize total space usage of this bitset. diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index 7ddea8a..6e5c3b7 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -30,6 +30,7 @@ pub use self::error::{FastFieldNotAvailableError, Result}; pub use self::facet_reader::FacetReader; pub use self::multivalued::{MultiValueIntFastFieldReader, MultiValueIntFastFieldWriter}; pub use self::reader::FastFieldReader; +pub use self::readers::FastFieldReaders; pub use self::serializer::FastFieldSerializer; pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; use common; @@ -43,6 +44,7 @@ mod error; mod facet_reader; mod multivalued; mod reader; +mod readers; mod serializer; mod writer; @@ -78,10 +80,6 @@ impl FastValue for u64 { *self } - fn as_u64(&self) -> u64 { - *self - } - fn fast_field_cardinality(field_type: &FieldType) -> Option { match *field_type { FieldType::U64(ref integer_options) => integer_options.get_fastfield_cardinality(), @@ -89,6 +87,10 @@ impl FastValue for u64 { _ => None, } } + + fn as_u64(&self) -> u64 { + *self + } } impl FastValue for i64 { diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index 3e2a30e..ad23fb7 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -37,9 +37,7 @@ mod tests { let searcher = index.reader().unwrap().searcher(); let segment_reader = searcher.segment_reader(0); let mut vals = Vec::new(); - let multi_value_reader = segment_reader - .multi_fast_field_reader::(field) - .unwrap(); + let multi_value_reader = segment_reader.fast_fields().u64s(field).unwrap(); { multi_value_reader.get_vals(2, &mut vals); assert_eq!(&vals, &[4u64]); @@ -198,9 +196,9 @@ mod tests { assert!(index_writer.commit().is_ok()); let searcher = index.reader().unwrap().searcher(); - let reader = searcher.segment_reader(0); + let segment_reader = searcher.segment_reader(0); let mut vals = Vec::new(); - let multi_value_reader = reader.multi_fast_field_reader::(field).unwrap(); + let multi_value_reader = segment_reader.fast_fields().i64s(field).unwrap(); { multi_value_reader.get_vals(2, &mut vals); assert_eq!(&vals, &[-4i64]); diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index 3456de5..ee3c615 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -26,6 +26,13 @@ impl MultiValueIntFastFieldReader { } } + pub(crate) fn into_u64s_reader(self) -> MultiValueIntFastFieldReader { + MultiValueIntFastFieldReader { + idx_reader: self.idx_reader, + vals_reader: self.vals_reader.into_u64_reader(), + } + } + /// Returns `(start, stop)`, such that the values associated /// to the given document are `start..stop`. fn range(&self, doc: DocId) -> (u64, u64) { @@ -41,13 +48,24 @@ impl MultiValueIntFastFieldReader { vals.resize(len, Item::default()); self.vals_reader.get_range_u64(start, &mut vals[..]); } + + /// Returns the number of values associated with the document `DocId`. + pub fn num_vals(&self, doc: DocId) -> usize { + let (start, stop) = self.range(doc); + (stop - start) as usize + } + + /// Returns the overall number of values in this field . + pub fn total_num_vals(&self) -> u64 { + self.idx_reader.max_value() + } } #[cfg(test)] mod tests { use core::Index; - use schema::{Document, Facet, Schema}; + use schema::{Facet, Schema}; #[test] fn test_multifastfield_reader() { @@ -58,22 +76,12 @@ mod tests { let mut index_writer = index .writer_with_num_threads(1, 30_000_000) .expect("Failed to create index writer."); - { - let mut doc = Document::new(); - doc.add_facet(facet_field, "/category/cat2"); - doc.add_facet(facet_field, "/category/cat1"); - index_writer.add_document(doc); - } - { - let mut doc = Document::new(); - doc.add_facet(facet_field, "/category/cat2"); - index_writer.add_document(doc); - } - { - let mut doc = Document::new(); - doc.add_facet(facet_field, "/category/cat3"); - index_writer.add_document(doc); - } + index_writer.add_document(doc!( + facet_field => Facet::from("/category/cat2"), + facet_field => Facet::from("/category/cat1"), + )); + index_writer.add_document(doc!(facet_field => Facet::from("/category/cat2"))); + index_writer.add_document(doc!(facet_field => Facet::from("/category/cat3"))); index_writer.commit().expect("Commit failed"); let searcher = index.reader().unwrap().searcher(); let segment_reader = searcher.segment_reader(0); diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs index c279427..bba8c24 100644 --- a/src/fastfield/reader.rs +++ b/src/fastfield/reader.rs @@ -50,6 +50,15 @@ impl FastFieldReader { } } + pub(crate) fn into_u64_reader(self) -> FastFieldReader { + FastFieldReader { + bit_unpacker: self.bit_unpacker, + min_value_u64: self.min_value_u64, + max_value_u64: self.max_value_u64, + _phantom: PhantomData, + } + } + /// Return the value associated to the given document. /// /// This accessor should return as fast as possible. diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs new file mode 100644 index 0000000..47b4391 --- /dev/null +++ b/src/fastfield/readers.rs @@ -0,0 +1,191 @@ +use common::CompositeFile; +use fastfield::BytesFastFieldReader; +use fastfield::MultiValueIntFastFieldReader; +use fastfield::{FastFieldNotAvailableError, FastFieldReader}; +use schema::{Cardinality, Field, FieldType, Schema}; +use space_usage::PerFieldSpaceUsage; +use std::collections::HashMap; +use Result; + +/// Provides access to all of the FastFieldReader. +/// +/// Internally, `FastFieldReaders` have preloaded fast field readers, +/// and just wraps several `HashMap`. +pub struct FastFieldReaders { + fast_field_i64: HashMap>, + fast_field_u64: HashMap>, + fast_field_i64s: HashMap>, + fast_field_u64s: HashMap>, + fast_bytes: HashMap, + fast_fields_composite: CompositeFile, +} + +enum FastType { + I64, + U64, +} + +fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality)> { + match field_type { + FieldType::U64(options) => options + .get_fastfield_cardinality() + .map(|cardinality| (FastType::U64, cardinality)), + FieldType::I64(options) => options + .get_fastfield_cardinality() + .map(|cardinality| (FastType::I64, cardinality)), + FieldType::HierarchicalFacet => Some((FastType::U64, Cardinality::MultiValues)), + _ => None, + } +} + +impl FastFieldReaders { + pub(crate) fn load_all( + schema: &Schema, + fast_fields_composite: &CompositeFile, + ) -> Result { + let mut fast_field_readers = FastFieldReaders { + fast_field_i64: Default::default(), + fast_field_u64: Default::default(), + fast_field_i64s: Default::default(), + fast_field_u64s: Default::default(), + fast_bytes: Default::default(), + fast_fields_composite: fast_fields_composite.clone(), + }; + for (field_id, field_entry) in schema.fields().iter().enumerate() { + let field = Field(field_id as u32); + let field_type = field_entry.field_type(); + if field_type == &FieldType::Bytes { + let idx_reader = fast_fields_composite + .open_read_with_idx(field, 0) + .ok_or_else(|| FastFieldNotAvailableError::new(field_entry)) + .map(FastFieldReader::open)?; + let data = fast_fields_composite + .open_read_with_idx(field, 1) + .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))?; + fast_field_readers + .fast_bytes + .insert(field, BytesFastFieldReader::open(idx_reader, data)); + } else if let Some((fast_type, cardinality)) = type_and_cardinality(field_type) { + match cardinality { + Cardinality::SingleValue => { + if let Some(fast_field_data) = fast_fields_composite.open_read(field) { + match fast_type { + FastType::U64 => { + let fast_field_reader = FastFieldReader::open(fast_field_data); + fast_field_readers + .fast_field_u64 + .insert(field, fast_field_reader); + } + FastType::I64 => { + fast_field_readers.fast_field_i64.insert( + field, + FastFieldReader::open(fast_field_data.clone()), + ); + } + } + } else { + return Err(From::from(FastFieldNotAvailableError::new(field_entry))); + } + } + Cardinality::MultiValues => { + let idx_opt = fast_fields_composite.open_read_with_idx(field, 0); + let data_opt = fast_fields_composite.open_read_with_idx(field, 1); + if let (Some(fast_field_idx), Some(fast_field_data)) = (idx_opt, data_opt) { + let idx_reader = FastFieldReader::open(fast_field_idx); + match fast_type { + FastType::I64 => { + let vals_reader = FastFieldReader::open(fast_field_data); + let multivalued_int_fast_field = + MultiValueIntFastFieldReader::open(idx_reader, vals_reader); + fast_field_readers + .fast_field_i64s + .insert(field, multivalued_int_fast_field); + } + FastType::U64 => { + let vals_reader = FastFieldReader::open(fast_field_data); + let multivalued_int_fast_field = + MultiValueIntFastFieldReader::open(idx_reader, vals_reader); + fast_field_readers + .fast_field_u64s + .insert(field, multivalued_int_fast_field); + } + } + } else { + return Err(From::from(FastFieldNotAvailableError::new(field_entry))); + } + } + } + } + } + Ok(fast_field_readers) + } + + pub(crate) fn space_usage(&self) -> PerFieldSpaceUsage { + self.fast_fields_composite.space_usage() + } + + /// Returns the `u64` fast field reader reader associated to `field`. + /// + /// If `field` is not a u64 fast field, this method returns `None`. + pub fn u64(&self, field: Field) -> Option> { + self.fast_field_u64.get(&field).cloned() + } + + /// If the field is a u64-fast field return the associated reader. + /// If the field is a i64-fast field, return the associated u64 reader. Values are + /// mapped from i64 to u64 using a (well the, it is unique) monotonic mapping. /// + /// + /// This method is useful when merging segment reader. + pub(crate) fn u64_lenient(&self, field: Field) -> Option> { + if let Some(u64_ff_reader) = self.u64(field) { + return Some(u64_ff_reader); + } + if let Some(i64_ff_reader) = self.i64(field) { + return Some(i64_ff_reader.into_u64_reader()); + } + None + } + + /// Returns the `i64` fast field reader reader associated to `field`. + /// + /// If `field` is not a i64 fast field, this method returns `None`. + pub fn i64(&self, field: Field) -> Option> { + self.fast_field_i64.get(&field).cloned() + } + + /// Returns a `u64s` multi-valued fast field reader reader associated to `field`. + /// + /// If `field` is not a u64 multi-valued fast field, this method returns `None`. + pub fn u64s(&self, field: Field) -> Option> { + self.fast_field_u64s.get(&field).cloned() + } + + /// If the field is a u64s-fast field return the associated reader. + /// If the field is a i64s-fast field, return the associated u64s reader. Values are + /// mapped from i64 to u64 using a (well the, it is unique) monotonic mapping. + /// + /// This method is useful when merging segment reader. + pub(crate) fn u64s_lenient(&self, field: Field) -> Option> { + if let Some(u64s_ff_reader) = self.u64s(field) { + return Some(u64s_ff_reader); + } + if let Some(i64s_ff_reader) = self.i64s(field) { + return Some(i64s_ff_reader.into_u64s_reader()); + } + None + } + + /// Returns a `i64s` multi-valued fast field reader reader associated to `field`. + /// + /// If `field` is not a i64 multi-valued fast field, this method returns `None`. + pub fn i64s(&self, field: Field) -> Option> { + self.fast_field_i64s.get(&field).cloned() + } + + /// Returns the `bytes` fast field reader associated to `field`. + /// + /// If `field` is not a bytes fast field, returns `None`. + pub fn bytes(&self, field: Field) -> Option { + self.fast_bytes.get(&field).cloned() + } +} diff --git a/src/indexer/log_merge_policy.rs b/src/indexer/log_merge_policy.rs index 45ef9cd..4c0731c 100644 --- a/src/indexer/log_merge_policy.rs +++ b/src/indexer/log_merge_policy.rs @@ -52,7 +52,7 @@ impl MergePolicy for LogMergePolicy { let mut size_sorted_tuples = segments .iter() - .map(|x| x.num_docs()) + .map(SegmentMeta::num_docs) .enumerate() .collect::>(); diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 404fd9b..ab3e221 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -3,6 +3,7 @@ use core::Segment; use core::SegmentReader; use core::SerializableSegment; use docset::DocSet; +use fastfield::BytesFastFieldReader; use fastfield::DeleteBitSet; use fastfield::FastFieldReader; use fastfield::FastFieldSerializer; @@ -72,7 +73,7 @@ fn compute_min_max_val( // some deleted documents, // we need to recompute the max / min (0..max_doc) - .filter(|doc_id| !delete_bitset.is_deleted(*doc_id)) + .filter(|doc_id| delete_bitset.is_alive(*doc_id)) .map(|doc_id| u64_reader.get(doc_id)) .minmax() .into_option() @@ -239,7 +240,10 @@ impl IndexMerger { let mut max_value = u64::min_value(); for reader in &self.readers { - let u64_reader: FastFieldReader = reader.fast_field_reader(field)?; + let u64_reader: FastFieldReader = reader + .fast_fields() + .u64_lenient(field) + .expect("Failed to find a reader for single fast field. This is a tantivy bug and it should never happen."); if let Some((seg_min_val, seg_max_val)) = compute_min_max_val(&u64_reader, reader.max_doc(), reader.delete_bitset()) { @@ -282,24 +286,28 @@ impl IndexMerger { fast_field_serializer: &mut FastFieldSerializer, ) -> Result<()> { let mut total_num_vals = 0u64; + let mut u64s_readers: Vec> = Vec::new(); // In the first pass, we compute the total number of vals. // // This is required by the bitpacker, as it needs to know // what should be the bit length use for bitpacking. for reader in &self.readers { - let idx_reader = reader.fast_field_reader_with_idx::(field, 0)?; + let u64s_reader = reader.fast_fields() + .u64s_lenient(field) + .expect("Failed to find index for multivalued field. This is a bug in tantivy, please report."); + if let Some(delete_bitset) = reader.delete_bitset() { for doc in 0u32..reader.max_doc() { - if !delete_bitset.is_deleted(doc) { - let start = idx_reader.get(doc); - let end = idx_reader.get(doc + 1); - total_num_vals += end - start; + if delete_bitset.is_alive(doc) { + let num_vals = u64s_reader.num_vals(doc) as u64; + total_num_vals += num_vals; } } } else { - total_num_vals += idx_reader.max_value(); + total_num_vals += u64s_reader.total_num_vals(); } + u64s_readers.push(u64s_reader); } // We can now create our `idx` serializer, and in a second pass, @@ -307,13 +315,10 @@ impl IndexMerger { let mut serialize_idx = fast_field_serializer.new_u64_fast_field_with_idx(field, 0, total_num_vals, 0)?; let mut idx = 0; - for reader in &self.readers { - let idx_reader = reader.fast_field_reader_with_idx::(field, 0)?; - for doc in reader.doc_ids_alive() { + for (segment_reader, u64s_reader) in self.readers.iter().zip(&u64s_readers) { + for doc in segment_reader.doc_ids_alive() { serialize_idx.add_val(idx)?; - let start = idx_reader.get(doc); - let end = idx_reader.get(doc + 1); - idx += end - start; + idx += u64s_reader.num_vals(doc) as u64; } } serialize_idx.add_val(idx)?; @@ -344,8 +349,10 @@ impl IndexMerger { for (segment_ord, segment_reader) in self.readers.iter().enumerate() { let term_ordinal_mapping: &[TermOrdinal] = term_ordinal_mappings.get_segment(segment_ord); - let ff_reader: MultiValueIntFastFieldReader = - segment_reader.multi_fast_field_reader(field)?; + let ff_reader: MultiValueIntFastFieldReader = segment_reader + .fast_fields() + .u64s(field) + .expect("Could not find multivalued u64 fast value reader."); // TODO optimize if no deletes for doc in segment_reader.doc_ids_alive() { ff_reader.get_vals(doc, &mut vals); @@ -377,6 +384,8 @@ impl IndexMerger { let mut vals = Vec::with_capacity(100); + let mut ff_readers = Vec::new(); + // Our values are bitpacked and we need to know what should be // our bitwidth and our minimum value before serializing any values. // @@ -385,7 +394,10 @@ impl IndexMerger { // maximum value and initialize our Serializer. for reader in &self.readers { let ff_reader: MultiValueIntFastFieldReader = - reader.multi_fast_field_reader(field)?; + reader.fast_fields().u64s_lenient(field).expect( + "Failed to find multivalued fast field reader. This is a bug in \ + tantivy. Please report.", + ); for doc in reader.doc_ids_alive() { ff_reader.get_vals(doc, &mut vals); for &val in &vals { @@ -393,6 +405,7 @@ impl IndexMerger { max_value = cmp::max(val, max_value); } } + ff_readers.push(ff_reader); // TODO optimize when no deletes } @@ -405,9 +418,7 @@ impl IndexMerger { { let mut serialize_vals = fast_field_serializer .new_u64_fast_field_with_idx(field, min_value, max_value, 1)?; - for reader in &self.readers { - let ff_reader: MultiValueIntFastFieldReader = - reader.multi_fast_field_reader(field)?; + for (reader, ff_reader) in self.readers.iter().zip(ff_readers) { // TODO optimize if no deletes for doc in reader.doc_ids_alive() { ff_reader.get_vals(doc, &mut vals); @@ -426,19 +437,53 @@ impl IndexMerger { field: Field, fast_field_serializer: &mut FastFieldSerializer, ) -> Result<()> { - self.write_fast_field_idx(field, fast_field_serializer)?; + let mut total_num_vals = 0u64; + let mut bytes_readers: Vec = Vec::new(); - let mut serialize_vals = fast_field_serializer.new_bytes_fast_field_with_idx(field, 1)?; for reader in &self.readers { - let bytes_reader = reader.bytes_fast_field_reader(field)?; + let bytes_reader = reader.fast_fields().bytes(field).expect( + "Failed to find bytes fast field reader. This is a bug in tantivy, please report.", + ); + if let Some(delete_bitset) = reader.delete_bitset() { + for doc in 0u32..reader.max_doc() { + if delete_bitset.is_alive(doc) { + let num_vals = bytes_reader.get_bytes(doc).len() as u64; + total_num_vals += num_vals; + } + } + } else { + total_num_vals += bytes_reader.total_num_bytes() as u64; + } + bytes_readers.push(bytes_reader); + } + + { + // We can now create our `idx` serializer, and in a second pass, + // can effectively push the different indexes. + let mut serialize_idx = + fast_field_serializer.new_u64_fast_field_with_idx(field, 0, total_num_vals, 0)?; + let mut idx = 0; + for (segment_reader, bytes_reader) in self.readers.iter().zip(&bytes_readers) { + for doc in segment_reader.doc_ids_alive() { + serialize_idx.add_val(idx)?; + idx += bytes_reader.get_bytes(doc).len() as u64; + } + } + serialize_idx.add_val(idx)?; + serialize_idx.close_field()?; + } + + let mut serialize_vals = fast_field_serializer.new_bytes_fast_field_with_idx(field, 1)?; + for segment_reader in &self.readers { + let bytes_reader = segment_reader.fast_fields().bytes(field) + .expect("Failed to find bytes field in fast field reader. This is a bug in tantivy. Please report."); // TODO: optimize if no deletes - for doc in reader.doc_ids_alive() { - let val = bytes_reader.get_val(doc); + for doc in segment_reader.doc_ids_alive() { + let val = bytes_reader.get_bytes(doc); serialize_vals.write_all(val)?; } } serialize_vals.flush()?; - Ok(()) } @@ -979,14 +1024,16 @@ mod tests { let score_field_reader = searcher .segment_reader(0) - .fast_field_reader::(score_field) + .fast_fields() + .u64(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 4000); assert_eq!(score_field_reader.max_value(), 7000); let score_field_reader = searcher .segment_reader(1) - .fast_field_reader::(score_field) + .fast_fields() + .u64(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 1); assert_eq!(score_field_reader.max_value(), 3); @@ -1037,7 +1084,8 @@ mod tests { ); let score_field_reader = searcher .segment_reader(0) - .fast_field_reader::(score_field) + .fast_fields() + .u64(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 3); assert_eq!(score_field_reader.max_value(), 7000); @@ -1083,7 +1131,8 @@ mod tests { ); let score_field_reader = searcher .segment_reader(0) - .fast_field_reader::(score_field) + .fast_fields() + .u64(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 3); assert_eq!(score_field_reader.max_value(), 7000); @@ -1135,7 +1184,8 @@ mod tests { ); let score_field_reader = searcher .segment_reader(0) - .fast_field_reader::(score_field) + .fast_fields() + .u64(score_field) .unwrap(); assert_eq!(score_field_reader.min_value(), 6000); assert_eq!(score_field_reader.max_value(), 7000); @@ -1381,7 +1431,7 @@ mod tests { { let segment = searcher.segment_reader(0u32); - let ff_reader = segment.multi_fast_field_reader(int_field).unwrap(); + let ff_reader = segment.fast_fields().u64s(int_field).unwrap(); ff_reader.get_vals(0, &mut vals); assert_eq!(&vals, &[1, 2]); @@ -1416,7 +1466,7 @@ mod tests { { let segment = searcher.segment_reader(1u32); - let ff_reader = segment.multi_fast_field_reader(int_field).unwrap(); + let ff_reader = segment.fast_fields().u64s(int_field).unwrap(); ff_reader.get_vals(0, &mut vals); assert_eq!(&vals, &[28, 27]); @@ -1426,7 +1476,7 @@ mod tests { { let segment = searcher.segment_reader(2u32); - let ff_reader = segment.multi_fast_field_reader(int_field).unwrap(); + let ff_reader = segment.fast_fields().u64s(int_field).unwrap(); ff_reader.get_vals(0, &mut vals); assert_eq!(&vals, &[20]); } @@ -1459,7 +1509,7 @@ mod tests { .collect::>() ); let segment = searcher.segment_reader(0u32); - let ff_reader = segment.multi_fast_field_reader(int_field).unwrap(); + let ff_reader = segment.fast_fields().u64s(int_field).unwrap(); ff_reader.get_vals(0, &mut vals); assert_eq!(&vals, &[1, 2]); diff --git a/src/indexer/segment_register.rs b/src/indexer/segment_register.rs index 74234f2..6d4dc7e 100644 --- a/src/indexer/segment_register.rs +++ b/src/indexer/segment_register.rs @@ -56,7 +56,7 @@ impl SegmentRegister { .values() .map(|segment_entry| segment_entry.meta().clone()) .collect(); - segment_ids.sort_by_key(|meta| meta.id()); + segment_ids.sort_by_key(SegmentMeta::id); segment_ids } diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 4eb42c8..41fab5a 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -5,6 +5,7 @@ use fastfield::FastFieldsWriter; use fieldnorm::FieldNormsWriter; use indexer::segment_serializer::SegmentSerializer; use postings::MultiFieldPostingsWriter; +use schema::FieldEntry; use schema::FieldType; use schema::Schema; use schema::Term; @@ -53,7 +54,7 @@ impl SegmentWriter { schema .fields() .iter() - .map(|field_entry| field_entry.field_type()) + .map(FieldEntry::field_type) .map(|field_type| match *field_type { FieldType::Str(ref text_options) => text_options .get_indexing_options() diff --git a/src/lib.rs b/src/lib.rs index 3a4a676..4f0e50a 100755 --- a/src/lib.rs +++ b/src/lib.rs @@ -876,28 +876,28 @@ mod tests { let searcher = reader.searcher(); let segment_reader: &SegmentReader = searcher.segment_reader(0); { - let fast_field_reader_res = segment_reader.fast_field_reader::(text_field); - assert!(fast_field_reader_res.is_err()); + let fast_field_reader_opt = segment_reader.fast_fields().u64(text_field); + assert!(fast_field_reader_opt.is_none()); } { - let fast_field_reader_res = segment_reader.fast_field_reader::(stored_int_field); - assert!(fast_field_reader_res.is_err()); + let fast_field_reader_opt = segment_reader.fast_fields().u64(stored_int_field); + assert!(fast_field_reader_opt.is_none()); } { - let fast_field_reader_res = segment_reader.fast_field_reader::(fast_field_signed); - assert!(fast_field_reader_res.is_err()); + let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_signed); + assert!(fast_field_reader_opt.is_none()); } { - let fast_field_reader_res = segment_reader.fast_field_reader::(fast_field_signed); - assert!(fast_field_reader_res.is_ok()); - let fast_field_reader = fast_field_reader_res.unwrap(); + let fast_field_reader_opt = segment_reader.fast_fields().i64(fast_field_signed); + assert!(fast_field_reader_opt.is_some()); + let fast_field_reader = fast_field_reader_opt.unwrap(); assert_eq!(fast_field_reader.get(0), 4i64) } { - let fast_field_reader_res = segment_reader.fast_field_reader::(fast_field_signed); - assert!(fast_field_reader_res.is_ok()); - let fast_field_reader = fast_field_reader_res.unwrap(); + let fast_field_reader_opt = segment_reader.fast_fields().i64(fast_field_signed); + assert!(fast_field_reader_opt.is_some()); + let fast_field_reader = fast_field_reader_opt.unwrap(); assert_eq!(fast_field_reader.get(0), 4i64) } } diff --git a/src/postings/serializer.rs b/src/postings/serializer.rs index 4169059..846780e 100644 --- a/src/postings/serializer.rs +++ b/src/postings/serializer.rs @@ -175,7 +175,7 @@ impl<'a> FieldSerializer<'a> { let positions_idx = self .positions_serializer_opt .as_ref() - .map(|positions_serializer| positions_serializer.positions_idx()) + .map(PositionSerializer::positions_idx) .unwrap_or(0u64); TermInfo { doc_freq: 0, diff --git a/src/query/phrase_query/phrase_query.rs b/src/query/phrase_query/phrase_query.rs index 959b17b..90f2d22 100644 --- a/src/query/phrase_query/phrase_query.rs +++ b/src/query/phrase_query/phrase_query.rs @@ -4,6 +4,7 @@ use error::TantivyError; use query::bm25::BM25Weight; use query::Query; use query::Weight; +use schema::IndexRecordOption; use schema::{Field, Term}; use std::collections::BTreeSet; use Result; @@ -83,7 +84,7 @@ impl Query for PhraseQuery { let has_positions = field_entry .field_type() .get_index_record_option() - .map(|index_record_option| index_record_option.has_positions()) + .map(IndexRecordOption::has_positions) .unwrap_or(false); if !has_positions { let field_name = field_entry.name(); diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index 2b6fda5..a06845b 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -1,6 +1,7 @@ #![cfg_attr(feature = "cargo-clippy", allow(clippy::unneeded_field_pattern))] #![cfg_attr(feature = "cargo-clippy", allow(clippy::toplevel_ref_arg))] +use super::query_grammar; use super::user_input_ast::*; use combine::char::*; use combine::error::StreamError; @@ -22,7 +23,7 @@ parser! { parser! { fn word[I]()(I) -> String where [I: Stream] { - many1(satisfy(|c: char| c.is_alphanumeric())) + many1(satisfy(char::is_alphanumeric)) .and_then(|s: String| { match s.as_str() { "OR" => Err(StreamErrorFor::::unexpected_static_message("OR")), @@ -62,7 +63,7 @@ parser! { fn negative_number[I]()(I) -> String where [I: Stream] { - (char('-'), many1(satisfy(|c: char| c.is_numeric()))) + (char('-'), many1(satisfy(char::is_numeric))) .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)) } } @@ -184,7 +185,7 @@ parser! { } ) ) - .map(|el| el.into_dnf()) + .map(query_grammar::Element::into_dnf) .map(|fnd| { if fnd.len() == 1 { UserInputAST::and(fnd.into_iter().next().unwrap()) //< safe diff --git a/src/query/union.rs b/src/query/union.rs index 71499fa..f636f05 100644 --- a/src/query/union.rs +++ b/src/query/union.rs @@ -96,7 +96,7 @@ fn refill( impl Union { fn refill(&mut self) -> bool { - if let Some(min_doc) = self.docsets.iter_mut().map(|docset| docset.doc()).min() { + if let Some(min_doc) = self.docsets.iter().map(DocSet::doc).min() { self.offset = min_doc; self.cursor = 0; refill( diff --git a/src/schema/document.rs b/src/schema/document.rs index bec5417..687baef 100644 --- a/src/schema/document.rs +++ b/src/schema/document.rs @@ -128,7 +128,7 @@ impl Document { self.field_values .iter() .filter(|field_value| field_value.field() == field) - .map(|field_value| field_value.value()) + .map(FieldValue::value) .collect() } @@ -137,7 +137,7 @@ impl Document { self.field_values .iter() .find(|field_value| field_value.field() == field) - .map(|field_value| field_value.value()) + .map(FieldValue::value) } } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index a67451d..561ba3f 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -4,6 +4,7 @@ use schema::{IntOptions, TextOptions}; use schema::Facet; use schema::IndexRecordOption; +use schema::TextFieldIndexing; use schema::Value; use serde_json::Value as JsonValue; @@ -94,7 +95,7 @@ impl FieldType { match *self { FieldType::Str(ref text_options) => text_options .get_indexing_options() - .map(|indexing_options| indexing_options.index_option()), + .map(TextFieldIndexing::index_option), FieldType::U64(ref int_options) | FieldType::I64(ref int_options) | FieldType::Date(ref int_options) => { diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 7afa8e6..bacfd74 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -233,7 +233,7 @@ impl Schema { let field_name = self.get_field_name(field); let values: Vec = field_values .into_iter() - .map(|field_val| field_val.value()) + .map(FieldValue::value) .cloned() .collect(); field_map.insert(field_name.to_string(), values); diff --git a/src/snippet/mod.rs b/src/snippet/mod.rs index 3fa605b..3a8e46a 100644 --- a/src/snippet/mod.rs +++ b/src/snippet/mod.rs @@ -1,6 +1,7 @@ use htmlescape::encode_minimal; use query::Query; use schema::Field; +use schema::Value; use std::cmp::Ordering; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -303,7 +304,7 @@ impl SnippetGenerator { let text: String = doc .get_all(self.field) .into_iter() - .flat_map(|val| val.text()) + .flat_map(Value::text) .collect::>() .join(" "); self.snippet(&text) diff --git a/src/space_usage/mod.rs b/src/space_usage/mod.rs index 8ffb841..cccca50 100644 --- a/src/space_usage/mod.rs +++ b/src/space_usage/mod.rs @@ -227,7 +227,7 @@ pub struct PerFieldSpaceUsage { impl PerFieldSpaceUsage { pub(crate) fn new(fields: HashMap) -> PerFieldSpaceUsage { - let total = fields.values().map(|x| x.total()).sum(); + let total = fields.values().map(FieldUsage::total).sum(); PerFieldSpaceUsage { fields, total } } diff --git a/src/tokenizer/tokenizer_manager.rs b/src/tokenizer/tokenizer_manager.rs index 7f97c58..4c72428 100644 --- a/src/tokenizer/tokenizer_manager.rs +++ b/src/tokenizer/tokenizer_manager.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::ops::Deref; use std::sync::{Arc, RwLock}; use tokenizer::box_tokenizer; use tokenizer::stemmer::Language; @@ -46,7 +47,8 @@ impl TokenizerManager { .read() .expect("Acquiring the lock should never fail") .get(tokenizer_name) - .map(|boxed_tokenizer| boxed_tokenizer.boxed_clone()) + .map(Deref::deref) + .map(BoxedTokenizer::boxed_clone) } } -- cgit v1.2.3