summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2019-05-05 13:52:43 +0900
committerGitHub <noreply@github.com>2019-05-05 13:52:43 +0900
commit66b4615e4e6ab66dfb67fb3d4fcbd68fb3051b5b (patch)
tree91e1e547e55cc1a7efcacbc4d5c6edcd65cd32a9
parent3df037961f07ec2e3232164e186d085cebe13067 (diff)
Issue/542 (#543)
* Closes 542. Fast fields are all loaded when the segment reader is created.
-rw-r--r--CHANGELOG.md19
-rw-r--r--examples/custom_collector.rs12
-rw-r--r--src/collector/facet_collector.rs6
-rw-r--r--src/collector/multi_collector.rs6
-rw-r--r--src/collector/tests.rs17
-rw-r--r--src/collector/top_collector.rs4
-rw-r--r--src/collector/top_field_collector.rs28
-rw-r--r--src/core/index.rs2
-rw-r--r--src/core/searcher.rs4
-rw-r--r--src/core/segment_reader.rs105
-rw-r--r--src/directory/ram_directory.rs2
-rw-r--r--src/fastfield/bytes/mod.rs14
-rw-r--r--src/fastfield/bytes/reader.rs15
-rw-r--r--src/fastfield/delete.rs20
-rw-r--r--src/fastfield/mod.rs10
-rw-r--r--src/fastfield/multivalued/mod.rs8
-rw-r--r--src/fastfield/multivalued/reader.rs42
-rw-r--r--src/fastfield/reader.rs9
-rw-r--r--src/fastfield/readers.rs191
-rw-r--r--src/indexer/log_merge_policy.rs2
-rw-r--r--src/indexer/merger.rs120
-rw-r--r--src/indexer/segment_register.rs2
-rw-r--r--src/indexer/segment_writer.rs3
-rwxr-xr-xsrc/lib.rs24
-rw-r--r--src/postings/serializer.rs2
-rw-r--r--src/query/phrase_query/phrase_query.rs3
-rw-r--r--src/query/query_parser/query_grammar.rs7
-rw-r--r--src/query/union.rs2
-rw-r--r--src/schema/document.rs4
-rw-r--r--src/schema/field_type.rs3
-rw-r--r--src/schema/schema.rs2
-rw-r--r--src/snippet/mod.rs3
-rw-r--r--src/space_usage/mod.rs2
-rw-r--r--src/tokenizer/tokenizer_manager.rs4
34 files changed, 478 insertions, 219 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7957d99..7dd3822 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
Tantivy 0.10.0
=====================
+
+*Tantivy 0.10.0 index format is compatible with the index format in 0.9.0.*
+
- Added an ASCII folding filter (@drusellers)
- Bugfix in `query.count` in presence of deletes (@pmasurel)
@@ -9,6 +12,22 @@ Minor
Calling .freq() or .doc() when .advance() has never
on segment postings should panic from now on.
- Tokens exceeding `u16::max_value() - 4` chars are discarded silently instead of panicking.
+- Fast fields are now preloaded when the `SegmentReader` is created.
+
+## How to update?
+
+Your existing indexes are usable as is. Your may or may need some
+trivial updates.
+
+### Fast fields
+
+Fast fields used to be accessed directly from the `SegmentReader`.
+The API changed, you are now required to acquire your fast field reader via the
+`segment_reader.fast_fields()`, and use one of the typed method:
+- `.u64()`, `.i64()` if your field is single-valued ;
+- `.u64s()`, `.i64s()` if your field is multi-valued ;
+- `.bytes()` if your field is bytes fast field.
+
Tantivy 0.9.0
diff --git a/examples/custom_collector.rs b/examples/custom_collector.rs
index 40a1d1a..444534c 100644
--- a/examples/custom_collector.rs
+++ b/examples/custom_collector.rs
@@ -18,7 +18,7 @@ use tantivy::fastfield::FastFieldReader;
use tantivy::query::QueryParser;
use tantivy::schema::Field;
use tantivy::schema::{Schema, FAST, INDEXED, TEXT};
-use tantivy::Index;
+use tantivy::{Index, TantivyError};
use tantivy::SegmentReader;
#[derive(Default)]
@@ -75,9 +75,15 @@ impl Collector for StatsCollector {
fn for_segment(
&self,
_segment_local_id: u32,
- segment: &SegmentReader,
+ segment_reader: &SegmentReader,
) -> tantivy::Result<StatsSegmentCollector> {
- let fast_field_reader = segment.fast_field_reader(self.field)?;
+ let fast_field_reader = segment_reader
+ .fast_fields()
+ .u64(self.field)
+ .ok_or_else(|| {
+ let field_name = segment_reader.schema().get_field_name()
+ TantivyError::SchemaError(format!("Field {:?} is not a u64 fast field.", field_name))
+ })?;
Ok(StatsSegmentCollector {
fast_field_reader,
stats: Stats::default(),
diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs
index 16ce942..f86cc94 100644
--- a/src/collector/facet_collector.rs
+++ b/src/collector/facet_collector.rs
@@ -17,6 +17,7 @@ use Result;
use Score;
use SegmentLocalId;
use SegmentReader;
+use TantivyError;
struct Hit<'a> {
count: u64,
@@ -264,7 +265,10 @@ impl Collector for FacetCollector {
_: SegmentLocalId,
reader: &SegmentReader,
) -> Result<FacetSegmentCollector> {
- let facet_reader = reader.facet_reader(self.field)?;
+ let field_name = reader.schema().get_field_name(self.field);
+ let facet_reader = reader.facet_reader(self.field).ok_or_else(|| {
+ TantivyError::SchemaError(format!("Field {:?} is not a facet field.", field_name))
+ })?;
let mut collapse_mapping = Vec::new();
let mut counts = Vec::new();
diff --git a/src/collector/multi_collector.rs b/src/collector/multi_collector.rs
index cc3bfc4..43d5dde 100644
--- a/src/collector/multi_collector.rs
+++ b/src/collector/multi_collector.rs
@@ -2,6 +2,7 @@ use super::Collector;
use super::SegmentCollector;
use collector::Fruit;
use std::marker::PhantomData;
+use std::ops::Deref;
use DocId;
use Result;
use Score;
@@ -199,7 +200,10 @@ impl<'a> Collector for MultiCollector<'a> {
}
fn requires_scoring(&self) -> bool {
- self.collector_wrappers.iter().any(|c| c.requires_scoring())
+ self.collector_wrappers
+ .iter()
+ .map(Deref::deref)
+ .any(Collector::requires_scoring)
}
fn merge_fruits(&self, segments_multifruits: Vec<MultiFruit>) -> Result<MultiFruit> {
diff --git a/src/collector/tests.rs b/src/collector/tests.rs
index cc8bcff..424ceb3 100644
--- a/src/collector/tests.rs
+++ b/src/collector/tests.rs
@@ -114,11 +114,15 @@ impl Collector for FastFieldTestCollector {
fn for_segment(
&self,
_: SegmentLocalId,
- reader: &SegmentReader,
+ segment_reader: &SegmentReader,
) -> Result<FastFieldSegmentCollector> {
+ let reader = segment_reader
+ .fast_fields()
+ .u64(self.field)
+ .expect("Requested field is not a fast field.");
Ok(FastFieldSegmentCollector {
vals: Vec::new(),
- reader: reader.fast_field_reader(self.field)?,
+ reader,
})
}
@@ -170,11 +174,14 @@ impl Collector for BytesFastFieldTestCollector {
fn for_segment(
&self,
_segment_local_id: u32,
- segment: &SegmentReader,
+ segment_reader: &SegmentReader,
) -> Result<BytesFastFieldSegmentCollector> {
Ok(BytesFastFieldSegmentCollector {
vals: Vec::new(),
- reader: segment.bytes_fast_field_reader(self.field)?,
+ reader: segment_reader
+ .fast_fields()
+ .bytes(self.field)
+ .expect("Field is not a bytes fast field."),
})
}
@@ -191,7 +198,7 @@ impl SegmentCollector for BytesFastFieldSegmentCollector {
type Fruit = Vec<u8>;
fn collect(&mut self, doc: u32, _score: f32) {
- let data = self.reader.get_val(doc);
+ let data = self.reader.get_bytes(doc);
self.vals.extend(data);
}
diff --git a/src/collector/top_collector.rs b/src/collector/top_collector.rs
index b17a7d6..880df69 100644
--- a/src/collector/top_collector.rs
+++ b/src/collector/top_collector.rs
@@ -98,11 +98,11 @@ where
.collect())
}
- pub(crate) fn for_segment(
+ pub(crate) fn for_segment<F: PartialOrd>(
&self,
segment_id: SegmentLocalId,
_: &SegmentReader,
- ) -> Result<TopSegmentCollector<T>> {
+ ) -> Result<TopSegmentCollector<F>> {
Ok(TopSegmentCollector::new(segment_id, self.limit))
}
}
diff --git a/src/collector/top_field_collector.rs b/src/collector/top_field_collector.rs
index b1a2d5e..b7cf7c9 100644
--- a/src/collector/top_field_collector.rs
+++ b/src/collector/top_field_collector.rs
@@ -5,10 +5,12 @@ use collector::SegmentCollector;
use fastfield::FastFieldReader;
use fastfield::FastValue;
use schema::Field;
+use std::marker::PhantomData;
use DocAddress;
use Result;
use SegmentLocalId;
use SegmentReader;
+use TantivyError;
/// The Top Field Collector keeps track of the K documents
/// sorted by a fast field in the index
@@ -106,8 +108,15 @@ impl<T: FastValue + PartialOrd + Send + Sync + 'static> Collector for TopDocsByF
reader: &SegmentReader,
) -> Result<TopFieldSegmentCollector<T>> {
let collector = self.collector.for_segment(segment_local_id, reader)?;
- let reader = reader.fast_field_reader(self.field)?;
- Ok(TopFieldSegmentCollector { collector, reader })
+ let reader = reader.fast_fields().u64(self.field).ok_or_else(|| {
+ let field_name = reader.schema().get_field_name(self.field);
+ TantivyError::SchemaError(format!("Failed to find fast field reader {:?}", field_name))
+ })?;
+ Ok(TopFieldSegmentCollector {
+ collector,
+ reader,
+ _type: PhantomData,
+ })
}
fn requires_scoring(&self) -> bool {
@@ -122,9 +131,10 @@ impl<T: FastValue + PartialOrd + Send + Sync + 'static> Collector for TopDocsByF
}
}
-pub struct TopFieldSegmentCollector<T: FastValue + PartialOrd> {
- collector: TopSegmentCollector<T>,
- reader: FastFieldReader<T>,
+pub struct TopFieldSegmentCollector<T> {
+ collector: TopSegmentCollector<u64>,
+ reader: FastFieldReader<u64>,
+ _type: PhantomData<T>,
}
impl<T: FastValue + PartialOrd + Send + Sync + 'static> SegmentCollector
@@ -138,7 +148,11 @@ impl<T: FastValue + PartialOrd + Send + Sync + 'static> SegmentCollector
}
fn harvest(self) -> Vec<(T, DocAddress)> {
- self.collector.harvest()
+ self.collector
+ .harvest()
+ .into_iter()
+ .map(|(val, doc_address)| (T::from_u64(val), doc_address))
+ .collect()
}
}
@@ -235,7 +249,7 @@ mod tests {
.for_segment(0, segment)
.map(|_| ())
.unwrap_err(),
- TantivyError::FastFieldError(_)
+ TantivyError::SchemaError(_)
);
}
diff --git a/src/core/index.rs b/src/core/index.rs
index 8e1709c..8b486e5 100644
--- a/src/core/index.rs
+++ b/src/core/index.rs
@@ -340,7 +340,7 @@ impl Index {
Ok(self
.searchable_segment_metas()?
.iter()
- .map(|segment_meta| segment_meta.id())
+ .map(SegmentMeta::id)
.collect())
}
}
diff --git a/src/core/searcher.rs b/src/core/searcher.rs
index 9e74fdd..3c0c745 100644
--- a/src/core/searcher.rs
+++ b/src/core/searcher.rs
@@ -59,7 +59,7 @@ impl Searcher {
) -> Searcher {
let store_readers = segment_readers
.iter()
- .map(|segment_reader| segment_reader.get_store_reader())
+ .map(SegmentReader::get_store_reader)
.collect();
Searcher {
schema,
@@ -218,7 +218,7 @@ impl fmt::Debug for Searcher {
let segment_ids = self
.segment_readers
.iter()
- .map(|segment_reader| segment_reader.segment_id())
+ .map(SegmentReader::segment_id)
.collect::<Vec<_>>();
write!(f, "Searcher({:?})", segment_ids)
}
diff --git a/src/core/segment_reader.rs b/src/core/segment_reader.rs
index a8ac2b0..2dbb5ad 100644
--- a/src/core/segment_reader.rs
+++ b/src/core/segment_reader.rs
@@ -5,14 +5,10 @@ use core::Segment;
use core::SegmentComponent;
use core::SegmentId;
use directory::ReadOnlySource;
-use error::TantivyError;
use fastfield::DeleteBitSet;
use fastfield::FacetReader;
-use fastfield::FastFieldReader;
-use fastfield::{self, FastFieldNotAvailableError};
-use fastfield::{BytesFastFieldReader, FastValue, MultiValueIntFastFieldReader};
+use fastfield::FastFieldReaders;
use fieldnorm::FieldNormReader;
-use schema::Cardinality;
use schema::Field;
use schema::FieldType;
use schema::Schema;
@@ -51,7 +47,7 @@ pub struct SegmentReader {
postings_composite: CompositeFile,
positions_composite: CompositeFile,
positions_idx_composite: CompositeFile,
- fast_fields_composite: CompositeFile,
+ fast_fields_readers: Arc<FastFieldReaders>,
fieldnorms_composite: CompositeFile,
store_source: ReadOnlySource,
@@ -105,93 +101,21 @@ impl SegmentReader {
///
/// # Panics
/// May panic if the index is corrupted.
- pub fn fast_field_reader<Item: FastValue>(
- &self,
- field: Field,
- ) -> fastfield::Result<FastFieldReader<Item>> {
- let field_entry = self.schema.get_field_entry(field);
- if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::SingleValue)
- {
- self.fast_fields_composite
- .open_read(field)
- .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
- .map(FastFieldReader::open)
- } else {
- Err(FastFieldNotAvailableError::new(field_entry))
- }
- }
-
- pub(crate) fn fast_field_reader_with_idx<Item: FastValue>(
- &self,
- field: Field,
- idx: usize,
- ) -> fastfield::Result<FastFieldReader<Item>> {
- if let Some(ff_source) = self.fast_fields_composite.open_read_with_idx(field, idx) {
- Ok(FastFieldReader::open(ff_source))
- } else {
- let field_entry = self.schema.get_field_entry(field);
- Err(FastFieldNotAvailableError::new(field_entry))
- }
- }
-
- /// Accessor to the `MultiValueIntFastFieldReader` associated to a given `Field`.
- /// May panick if the field is not a multivalued fastfield of the type `Item`.
- pub fn multi_fast_field_reader<Item: FastValue>(
- &self,
- field: Field,
- ) -> fastfield::Result<MultiValueIntFastFieldReader<Item>> {
- let field_entry = self.schema.get_field_entry(field);
- if Item::fast_field_cardinality(field_entry.field_type()) == Some(Cardinality::MultiValues)
- {
- let idx_reader = self.fast_field_reader_with_idx(field, 0)?;
- let vals_reader = self.fast_field_reader_with_idx(field, 1)?;
- Ok(MultiValueIntFastFieldReader::open(idx_reader, vals_reader))
- } else {
- Err(FastFieldNotAvailableError::new(field_entry))
- }
- }
-
- /// Accessor to the `BytesFastFieldReader` associated to a given `Field`.
- pub fn bytes_fast_field_reader(&self, field: Field) -> fastfield::Result<BytesFastFieldReader> {
- let field_entry = self.schema.get_field_entry(field);
- match *field_entry.field_type() {
- FieldType::Bytes => {}
- _ => return Err(FastFieldNotAvailableError::new(field_entry)),
- }
- let idx_reader = self
- .fast_fields_composite
- .open_read_with_idx(field, 0)
- .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))
- .map(FastFieldReader::open)?;
- let values = self
- .fast_fields_composite
- .open_read_with_idx(field, 1)
- .ok_or_else(|| FastFieldNotAvailableError::new(field_entry))?;
- Ok(BytesFastFieldReader::open(idx_reader, values))
+ pub fn fast_fields(&self) -> &FastFieldReaders {
+ &self.fast_fields_readers
}
/// Accessor to the `FacetReader` associated to a given `Field`.
- pub fn facet_reader(&self, field: Field) -> Result<FacetReader> {
+ pub fn facet_reader(&self, field: Field) -> Option<FacetReader> {
let field_entry = self.schema.get_field_entry(field);
if field_entry.field_type() != &FieldType::HierarchicalFacet {
- return Err(TantivyError::InvalidArgument(format!(
- "The field {:?} is not a \
- hierarchical facet.",
- field_entry
- )));
+ return None;
}
- let term_ords_reader = self.multi_fast_field_reader(field)?;
- let termdict_source = self.termdict_composite.open_read(field).ok_or_else(|| {
- TantivyError::InvalidArgument(format!(
- "The field \"{}\" is a hierarchical \
- but this segment does not seem to have the field term \
- dictionary.",
- field_entry.name()
- ))
- })?;
+ let term_ords_reader = self.fast_fields().u64s(field)?;
+ let termdict_source = self.termdict_composite.open_read(field)?;
let termdict = TermDictionary::from_source(&termdict_source);
let facet_reader = FacetReader::new(term_ords_reader, termdict);
- Ok(facet_reader)
+ Some(facet_reader)
}
/// Accessor to the segment's `Field norms`'s reader.
@@ -247,8 +171,12 @@ impl SegmentReader {
}
};
+ let schema = segment.schema();
+
let fast_fields_data = segment.open_read(SegmentComponent::FASTFIELDS)?;
let fast_fields_composite = CompositeFile::open(&fast_fields_data)?;
+ let fast_field_readers =
+ Arc::new(FastFieldReaders::load_all(&schema, &fast_fields_composite)?);
let fieldnorms_data = segment.open_read(SegmentComponent::FIELDNORMS)?;
let fieldnorms_composite = CompositeFile::open(&fieldnorms_data)?;
@@ -260,14 +188,13 @@ impl SegmentReader {
None
};
- let schema = segment.schema();
Ok(SegmentReader {
inv_idx_reader_cache: Arc::new(RwLock::new(HashMap::new())),
max_doc: segment.meta().max_doc(),
num_docs: segment.meta().num_docs(),
termdict_composite,
postings_composite,
- fast_fields_composite,
+ fast_fields_readers: fast_field_readers,
fieldnorms_composite,
segment_id: segment.id(),
store_source,
@@ -381,12 +308,12 @@ impl SegmentReader {
self.postings_composite.space_usage(),
self.positions_composite.space_usage(),
self.positions_idx_composite.space_usage(),
- self.fast_fields_composite.space_usage(),
+ self.fast_fields_readers.space_usage(),
self.fieldnorms_composite.space_usage(),
self.get_store_reader().space_usage(),
self.delete_bitset_opt
.as_ref()
- .map(|x| x.space_usage())
+ .map(DeleteBitSet::space_usage)
.unwrap_or(0),
)
}
diff --git a/src/directory/ram_directory.rs b/src/directory/ram_directory.rs
index 9851177..804763e 100644
--- a/src/directory/ram_directory.rs
+++ b/src/directory/ram_directory.rs
@@ -86,7 +86,7 @@ impl InnerDirectory {
self.fs
.get(path)
.ok_or_else(|| OpenReadError::FileDoesNotExist(PathBuf::from(path)))
- .map(|el| el.clone())
+ .map(Clone::clone)
}
fn delete(&mut self, path: &Path) -> result::Result<(), DeleteError> {
diff --git a/src/fastfield/bytes/mod.rs b/src/fastfield/bytes/mod.rs
index b3e73a5..0106d3a 100644
--- a/src/fastfield/bytes/mod.rs
+++ b/src/fastfield/bytes/mod.rs
@@ -23,14 +23,14 @@ mod tests {
index_writer.add_document(doc!(field=>vec![0u8; 1000]));
assert!(index_writer.commit().is_ok());
let searcher = index.reader().unwrap().searcher();
- let reader = searcher.segment_reader(0);
- let bytes_reader = reader.bytes_fast_field_reader(field).unwrap();
+ let segment_reader = searcher.segment_reader(0);
+ let bytes_reader = segment_reader.fast_fields().bytes(field).unwrap();
- assert_eq!(bytes_reader.get_val(0), &[0u8, 1, 2, 3]);
- assert!(bytes_reader.get_val(1).is_empty());
- assert_eq!(bytes_reader.get_val(2), &[255u8]);
- assert_eq!(bytes_reader.get_val(3), &[1u8, 3, 5, 7, 9]);
+ assert_eq!(bytes_reader.get_bytes(0), &[0u8, 1, 2, 3]);
+ assert!(bytes_reader.get_bytes(1).is_empty());
+ assert_eq!(bytes_reader.get_bytes(2), &[255u8]);
+ assert_eq!(bytes_reader.get_bytes(3), &[1u8, 3, 5, 7, 9]);
let long = vec![0u8; 1000];
- assert_eq!(bytes_reader.get_val(4), long.as_slice());
+ assert_eq!(bytes_reader.get_bytes(4), long.as_slice());
}
}
diff --git a/src/fastfield/bytes/reader.rs b/src/fastfield/bytes/reader.rs
index 9e4c879..11b6520 100644
--- a/src/fastfield/bytes/reader.rs
+++ b/src/fastfield/bytes/reader.rs
@@ -14,6 +14,7 @@ use DocId;
///
/// Reading the value for a document is done by reading the start index for it,
/// and the start index for the next document, and keeping the bytes in between.
+#[derive(Clone)]
pub struct BytesFastFieldReader {
idx_reader: FastFieldReader<u64>,
values: OwningRef<ReadOnlySource, [u8]>,
@@ -28,10 +29,20 @@ impl BytesFastFieldReader {
BytesFastFieldReader { idx_reader, values }
}
- /// Returns the bytes associated to the given `doc`
- pub fn get_val(&self, doc: DocId) -> &[u8] {
+ fn range(&self, doc: DocId) -> (usize, usize) {
let start = self.idx_reader.get(doc) as usize;
let stop = self.idx_reader.get(doc + 1) as usize;
+ (start, stop)
+ }
+
+ /// Returns the bytes associated to the given `doc`
+ pub fn get_bytes(&self, doc: DocId) -> &[u8] {
+ let (start, stop) = self.range(doc);
&self.values[start..stop]
}
+
+ /// Returns the overall number of bytes in this bytes fast field.
+ pub fn total_num_bytes(&self) -> usize {
+ self.values.len()
+ }
}
diff --git a/src/fastfield/delete.rs b/src/fastfield/delete.rs
index 76ff7e4..d77dcc0 100644
--- a/src/fastfield/delete.rs
+++ b/src/fastfield/delete.rs
@@ -53,16 +53,18 @@ impl DeleteBitSet {
}
}
- /// Returns whether the document has been marked as deleted.
+ /// Returns true iff the document is still "alive". In other words, if it has not been deleted.
+ pub fn is_alive(&self, doc: DocId) -> bool {
+ !self.is_deleted(doc)
+ }
+
+ /// Returns true iff the document has been marked as deleted.
+ #[inline(always)]
pub fn is_deleted(&self, doc: DocId) -> bool {
- if self.len == 0 {
- false
- } else {
- let byte_offset = doc / 8u32;
- let b: u8 = (*self.data)[byte_offset as usize];
- let shift = (doc & 7u32) as u8;
- b & (1u8 << shift) != 0
- }
+ let byte_offset = doc / 8u32;
+ let b: u8 = (*self.data)[byte_offset as usize];
+ let shift = (doc & 7u32) as u8;
+ b & (1u8 << shift) != 0
}
/// Summarize total space usage of this bitset.
diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs
index 7ddea8a..6e5c3b7 100644
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -30,6 +30,7 @@ pub use self::error::{FastFieldNotAvailableError, Result};
pub use self::facet_reader::FacetReader;
pub use self::multivalued::{MultiValueIntFastFieldReader, MultiValueIntFastFieldWriter};
pub use self::reader::FastFieldReader;
+pub use self::readers::FastFieldReaders;
pub use self::serializer::FastFieldSerializer;
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
use common;
@@ -43,6 +44,7 @@ mod error;
mod facet_reader;
mod multivalued;
mod reader;
+mod readers;
mod serializer;
mod writer;
@@ -78,10 +80,6 @@ impl FastValue for u64 {
*self
}
- fn as_u64(&self) -> u64 {
- *self
- }
-
fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
match *field_type {
FieldType::U64(ref integer_options) => integer_options.get_fastfield_cardinality(),
@@ -89,6 +87,10 @@ impl FastValue for u64 {
_ => None,
}
}
+
+ fn as_u64(&self) -> u64 {
+ *self
+ }
}
impl FastValue for i64 {
diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs
index 3e2a30e..ad23fb7 100644
--- a/src/fastfield/multivalued/mod.rs
+++ b/src/fastfield/multivalued/mod.rs
@@ -37,9 +37,7 @@ mod tests {
let searcher = index.reader().unwrap().searcher();
let segment_reader = searcher.segment_reader(0);
let mut vals = Vec::new();
- let multi_value_reader = segment_reader
- .multi_fast_field_reader::<u64>(field)
- .unwrap();
+ let multi_value_reader = segment_reader.fast_fields().u64s(field).unwrap();
{
multi_value_reader.get_vals(2, &mut vals);
assert_eq!(&vals, &[4u64]);
@@ -198,9 +196,9 @@ mod tests {
assert!(index_writer.commit().is_ok());
let searcher = index.reader().unwrap().searcher();
- let reader = searcher.segment_reader(0);
+ let segment_reader = searcher.segment_reader(0);
let mut vals = Vec::new();
- let multi_value_reader = reader.multi_fast_field_reader::<i64>(field).unwrap();
+ let multi_value_reader = segment_reader.fast_fields().i64s(field).unwrap();
{
multi_value_reader.get_vals(2, &mut vals);
assert_eq!(&vals, &[-4i64]);
diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs
index 3456de5..ee3c615 100644
--- a/src/fastfield/multivalued/reader.rs
+++ b/src/fastfield/multivalued/reader.rs
@@ -26,6 +26,13 @@ impl<Item: FastValue> MultiValueIntFastFieldReader<Item> {
}
}
+ pub(crate) fn into_u64s_reader(self) -> MultiValueIntFastFieldReader<u64> {
+ MultiValueIntFastFieldReader {
+ idx_reader: self.idx_reader,
+ vals_reader: self.vals_reader.into_u64_reader(),
+ }
+ }
+
/// Returns `(start, stop)`, such that the values associated
/// to the given document are `start..stop`.
fn range(&self, doc: DocId) -> (u64, u64) {
@@ -41,13 +48,24 @@ impl<Item: FastValue> MultiValueIntFastFieldReader<Item> {
vals.resize(len, Item::default());
self.vals_reader.get_range_u64(start, &mut vals[..]);
}
+
+ /// Returns the number of values associated with the document `DocId`.
+ pub fn num_vals(&self, doc: DocId) -> usize {
+ let (start, stop) = self.range(doc);
+ (stop - start) as usize
+ }
+
+ /// Returns the overall number of values in this field .
+ pub fn total_num_vals(&self) -> u64 {
+ self.idx_reader.max_value()
+ }
}
#[cfg(test)]
mod tests {
use core::Index;
- use schema::{Document, Facet, Schema};
+ use schema::{Facet, Schema};
#[test]
fn test_multifastfield_reader() {
@@ -58,22 +76,12 @@ mod tests {
let mut index_writer = index
.writer_with_num_threads(1, 30_000_000)
.expect("Failed to create index writer.");
- {
- let mut doc = Document::new();
- doc.add_facet(facet_field, "/category/cat2");
- doc.add_facet(facet_field, "/category/cat1");
- index_writer.add_document(doc);
- }
- {
- let mut doc = Document::new();
- doc.add_facet(facet_field, "/category/cat2");
- index_writer.add_document(doc);
- }
- {
- let mut doc = Document::new();
- doc.add_facet(facet_field, "/category/cat3");
- index_writer.add_document(doc);
- }
+ index_writer.add_document(doc!(
+ facet_field => Facet::from("/category/cat2"),
+ facet_field => Facet::from("/category/cat1"),
+ ));
+ index_writer.add_document(doc!(facet_field => Facet::from("/category/cat2")));
+ index_writer.add_document(doc!(facet_field => Facet::from("/category/cat3")));
index_writer.commit().expect("Commit failed");
let searcher = index.reader().unwrap().searcher();
let segment_reader = searcher.segment_reader(0);
diff --git a/src/fastfield/reader.rs b/src/fastfield/reader.rs
index c279427..bba8c24 100644
--- a/src/fastfield/reader.rs
+++ b/src/fastfield/reader.rs
@@ -50,6 +50,15 @@ impl<Item: FastValue> FastFieldReader<Item> {
}
}
+ pub(crate) fn into_u64_reader(self) -> FastFieldReader<u64> {
+ FastFieldReader {
+ bit_unpacker: self.bit_unpacker,
+ min_