summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorfdb-hiroshima <35889323+fdb-hiroshima@users.noreply.github.com>2019-07-27 10:57:33 +0200
committerPaul Masurel <paul.masurel@gmail.com>2019-07-27 17:57:33 +0900
commit6eb4e08636f48091f7783d52c84ef78e7ff1ada4 (patch)
tree2a91b898c007abfdf32e4261950a8625bc2749ca
parentc3231ca252da192e7e164b9b7474a6b99b85d7a7 (diff)
add support for float (#603)
* add basic support for float as for i64, they are mapped to u64 for indexing query parser don't work yet * Update value.rs * implement support for float in query parser * Update README.md
-rw-r--r--CHANGELOG.md5
-rw-r--r--README.md4
-rw-r--r--src/collector/int_facet_collector.rs7
-rw-r--r--src/collector/top_score_collector.rs1
-rw-r--r--src/common/mod.rs63
-rw-r--r--src/common/serialize.rs18
-rw-r--r--src/fastfield/mod.rs26
-rw-r--r--src/fastfield/readers.rs38
-rw-r--r--src/fastfield/writer.rs20
-rw-r--r--src/indexer/merger.rs1
-rw-r--r--src/indexer/segment_writer.rs11
-rwxr-xr-xsrc/lib.rs44
-rw-r--r--src/postings/postings_writer.rs3
-rw-r--r--src/query/query_parser/query_grammar.rs4
-rw-r--r--src/query/query_parser/query_parser.rs39
-rw-r--r--src/query/range_query.rs90
-rw-r--r--src/schema/document.rs5
-rw-r--r--src/schema/field_entry.rs20
-rw-r--r--src/schema/field_type.rs18
-rw-r--r--src/schema/flags.rs4
-rw-r--r--src/schema/mod.rs2
-rw-r--r--src/schema/schema.rs54
-rw-r--r--src/schema/term.rs30
-rw-r--r--src/schema/value.rs81
-rw-r--r--src/termdict/mod.rs3
25 files changed, 545 insertions, 46 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 465210f..4f40ff2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,8 @@
+Tantivy 0.11.0
+=====================
+
+- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
+
Tantivy 0.10.0
=====================
diff --git a/README.md b/README.md
index 0441e82..38e3111 100644
--- a/README.md
+++ b/README.md
@@ -50,9 +50,9 @@ performance for different type of queries / collection.
- Multithreaded indexing (indexing English Wikipedia takes < 3 minutes on my desktop)
- Mmap directory
- SIMD integer compression when the platform/CPU includes the SSE2 instruction set.
-- Single valued and multivalued u64 and i64 fast fields (equivalent of doc values in Lucene)
+- Single valued and multivalued u64, i64 and f64 fast fields (equivalent of doc values in Lucene)
- `&[u8]` fast fields
-- Text, i64, u64, dates and hierarchical facet fields
+- Text, i64, u64, f64, dates and hierarchical facet fields
- LZ4 compressed document store
- Range queries
- Faceted search
diff --git a/src/collector/int_facet_collector.rs b/src/collector/int_facet_collector.rs
index 4232343..d9b4c13 100644
--- a/src/collector/int_facet_collector.rs
+++ b/src/collector/int_facet_collector.rs
@@ -82,6 +82,7 @@ mod tests {
let mut schema_builder = schema::Schema::builder();
let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST);
let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST);
+ let num_field_f64 = schema_builder.add_f64_field("num_f64", FAST);
let text_field = schema_builder.add_text_field("text", STRING);
let schema = schema_builder.build();
@@ -94,6 +95,7 @@ mod tests {
index_writer.add_document(doc!(
num_field_i64 => ((i as i64) % 3i64) as i64,
num_field_u64 => (i % 2u64) as u64,
+ num_field_f64 => (i % 4u64) as f64,
text_field => "text"
));
}
@@ -104,10 +106,11 @@ mod tests {
let searcher = index.reader().searcher();
let mut ffvf_i64: IntFacetCollector<I64FastFieldReader> = IntFacetCollector::new(num_field_i64);
let mut ffvf_u64: IntFacetCollector<U64FastFieldReader> = IntFacetCollector::new(num_field_u64);
+ let mut ffvf_f64: IntFacetCollector<F64FastFieldReader> = IntFacetCollector::new(num_field_f64);
{
// perform the query
- let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64);
+ let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64).push(&mut ffvf_f64);
let mut query_parser = QueryParser::for_index(index, vec![text_field]);
let query = query_parser.parse_query("text:text").unwrap();
query.search(&searcher, &mut facet_collectors).unwrap();
@@ -117,6 +120,8 @@ mod tests {
assert_eq!(ffvf_u64.counters[&1], 5);
assert_eq!(ffvf_i64.counters[&0], 4);
assert_eq!(ffvf_i64.counters[&1], 3);
+ assert_eq!(ffvf_f64.counters[&0.0], 3);
+ assert_eq!(ffvf_f64.counters[&2.0], 2);
}
}
diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs
index c9b03d0..4ec09ce 100644
--- a/src/collector/top_score_collector.rs
+++ b/src/collector/top_score_collector.rs
@@ -160,6 +160,7 @@ impl TopDocs {
.fast_fields()
.u64(field)
.expect("Field requested is not a i64/u64 fast field.");
+ //TODO error message missmatch actual behavior for i64
move |doc: DocId| ff_reader.get(doc)
})
}
diff --git a/src/common/mod.rs b/src/common/mod.rs
index 8f6deaf..7e41f08 100644
--- a/src/common/mod.rs
+++ b/src/common/mod.rs
@@ -99,15 +99,54 @@ pub fn u64_to_i64(val: u64) -> i64 {
(val ^ HIGHEST_BIT) as i64
}
+/// Maps a `f64` to `u64`
+///
+/// For simplicity, tantivy internally handles `f64` as `u64`.
+/// The mapping is defined by this function.
+///
+/// Maps `f64` to `u64` so that lexical order is preserved.
+///
+/// This is more suited than simply casting (`val as u64`)
+/// which would truncate the result
+///
+/// # See also
+/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html).
+#[inline(always)]
+pub fn f64_to_u64(val: f64) -> u64 {
+ let bits = val.to_bits();
+ if val.is_sign_positive() {
+ bits ^ HIGHEST_BIT
+ } else {
+ !bits
+ }
+}
+
+/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html).
+#[inline(always)]
+pub fn u64_to_f64(val: u64) -> f64 {
+ f64::from_bits(
+ if val & HIGHEST_BIT != 0 {
+ val ^ HIGHEST_BIT
+ } else {
+ !val
+ }
+ )
+}
+
#[cfg(test)]
pub(crate) mod test {
pub use super::serialize::test::fixed_size_test;
- use super::{compute_num_bits, i64_to_u64, u64_to_i64};
+ use super::{compute_num_bits, i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
+ use std::f64;
fn test_i64_converter_helper(val: i64) {
assert_eq!(u64_to_i64(i64_to_u64(val)), val);
}
+
+ fn test_f64_converter_helper(val: f64) {
+ assert_eq!(u64_to_f64(f64_to_u64(val)), val);
+ }
#[test]
fn test_i64_converter() {
@@ -122,6 +161,28 @@ pub(crate) mod test {
}
#[test]
+ fn test_f64_converter() {
+ test_f64_converter_helper(f64::INFINITY);
+ test_f64_converter_helper(f64::NEG_INFINITY);
+ test_f64_converter_helper(0.0);
+ test_f64_converter_helper(-0.0);
+ test_f64_converter_helper(1.0);
+ test_f64_converter_helper(-1.0);
+ }
+
+ #[test]
+ fn test_f64_order() {
+ assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)).contains(&f64_to_u64(f64::NAN))); //nan is not a number
+ assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa
+ assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent
+ assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa
+ assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg
+ assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0));
+ assert!(f64_to_u64(-2.0) < f64_to_u64(1.0));
+ assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5));
+ }
+
+ #[test]
fn test_compute_num_bits() {
assert_eq!(compute_num_bits(1), 1u8);
assert_eq!(compute_num_bits(0), 0u8);
diff --git a/src/common/serialize.rs b/src/common/serialize.rs
index 4156115..85a9443 100644
--- a/src/common/serialize.rs
+++ b/src/common/serialize.rs
@@ -102,6 +102,19 @@ impl FixedSize for i64 {
const SIZE_IN_BYTES: usize = 8;
}
+impl BinarySerializable for f64 {
+ fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
+ writer.write_f64::<Endianness>(*self)
+ }
+ fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> {
+ reader.read_f64::<Endianness>()
+ }
+}
+
+impl FixedSize for f64 {
+ const SIZE_IN_BYTES: usize = 8;
+}
+
impl BinarySerializable for u8 {
fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> {
writer.write_u8(*self)
@@ -173,6 +186,11 @@ pub mod test {
}
#[test]
+ fn test_serialize_f64() {
+ fixed_size_test::<f64>();
+ }
+
+ #[test]
fn test_serialize_u64() {
fixed_size_test::<u64>();
}
diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs
index aa5104d..de8f932 100644
--- a/src/fastfield/mod.rs
+++ b/src/fastfield/mod.rs
@@ -48,7 +48,7 @@ mod readers;
mod serializer;
mod writer;
-/// Trait for types that are allowed for fast fields: (u64 or i64).
+/// Trait for types that are allowed for fast fields: (u64, i64 and f64).
pub trait FastValue: Default + Clone + Copy + Send + Sync + PartialOrd {
/// Converts a value from u64
///
@@ -114,11 +114,33 @@ impl FastValue for i64 {
}
}
+impl FastValue for f64 {
+ fn from_u64(val: u64) -> Self {
+ common::u64_to_f64(val)
+ }
+
+ fn to_u64(&self) -> u64 {
+ common::f64_to_u64(*self)
+ }
+
+ fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
+ match *field_type {
+ FieldType::F64(ref integer_options) => integer_options.get_fastfield_cardinality(),
+ _ => None,
+ }
+ }
+
+ fn as_u64(&self) -> u64 {
+ self.to_bits()
+ }
+}
+
fn value_to_u64(value: &Value) -> u64 {
match *value {
Value::U64(ref val) => *val,
Value::I64(ref val) => common::i64_to_u64(*val),
- _ => panic!("Expected a u64/i64 field, got {:?} ", value),
+ Value::F64(ref val) => common::f64_to_u64(*val),
+ _ => panic!("Expected a u64/i64/f64 field, got {:?} ", value),
}
}
diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs
index 4019cf3..1eb4ca2 100644
--- a/src/fastfield/readers.rs
+++ b/src/fastfield/readers.rs
@@ -14,8 +14,10 @@ use std::collections::HashMap;
pub struct FastFieldReaders {
fast_field_i64: HashMap<Field, FastFieldReader<i64>>,
fast_field_u64: HashMap<Field, FastFieldReader<u64>>,
+ fast_field_f64: HashMap<Field, FastFieldReader<f64>>,
fast_field_i64s: HashMap<Field, MultiValueIntFastFieldReader<i64>>,
fast_field_u64s: HashMap<Field, MultiValueIntFastFieldReader<u64>>,
+ fast_field_f64s: HashMap<Field, MultiValueIntFastFieldReader<f64>>,
fast_bytes: HashMap<Field, BytesFastFieldReader>,
fast_fields_composite: CompositeFile,
}
@@ -23,6 +25,7 @@ pub struct FastFieldReaders {
enum FastType {
I64,
U64,
+ F64,
}
fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality)> {
@@ -33,6 +36,9 @@ fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality
FieldType::I64(options) => options
.get_fastfield_cardinality()
.map(|cardinality| (FastType::I64, cardinality)),
+ FieldType::F64(options) => options
+ .get_fastfield_cardinality()
+ .map(|cardinality| (FastType::F64, cardinality)),
FieldType::HierarchicalFacet => Some((FastType::U64, Cardinality::MultiValues)),
_ => None,
}
@@ -46,8 +52,10 @@ impl FastFieldReaders {
let mut fast_field_readers = FastFieldReaders {
fast_field_i64: Default::default(),
fast_field_u64: Default::default(),
+ fast_field_f64: Default::default(),
fast_field_i64s: Default::default(),
fast_field_u64s: Default::default(),
+ fast_field_f64s: Default::default(),
fast_bytes: Default::default(),
fast_fields_composite: fast_fields_composite.clone(),
};
@@ -82,6 +90,12 @@ impl FastFieldReaders {
FastFieldReader::open(fast_field_data.clone()),
);
}
+ FastType::F64 => {
+ fast_field_readers.fast_field_f64.insert(
+ field,
+ FastFieldReader::open(fast_field_data.clone()),
+ );
+ }
}
} else {
return Err(From::from(FastFieldNotAvailableError::new(field_entry)));
@@ -109,6 +123,14 @@ impl FastFieldReaders {
.fast_field_u64s
.insert(field, multivalued_int_fast_field);
}
+ FastType::F64 => {
+ let vals_reader = FastFieldReader::open(fast_field_data);
+ let multivalued_int_fast_field =
+ MultiValueIntFastFieldReader::open(idx_reader, vals_reader);
+ fast_field_readers
+ .fast_field_f64s
+ .insert(field, multivalued_int_fast_field);
+ }
}
} else {
return Err(From::from(FastFieldNotAvailableError::new(field_entry)));
@@ -135,6 +157,8 @@ impl FastFieldReaders {
/// If the field is a i64-fast field, return the associated u64 reader. Values are
/// mapped from i64 to u64 using a (well the, it is unique) monotonic mapping. ///
///
+ ///TODO should it also be lenient with f64?
+ ///
/// This method is useful when merging segment reader.
pub(crate) fn u64_lenient(&self, field: Field) -> Option<FastFieldReader<u64>> {
if let Some(u64_ff_reader) = self.u64(field) {
@@ -153,6 +177,13 @@ impl FastFieldReaders {
self.fast_field_i64.get(&field).cloned()
}
+ /// Returns the `f64` fast field reader reader associated to `field`.
+ ///
+ /// If `field` is not a f64 fast field, this method returns `None`.
+ pub fn f64(&self, field: Field) -> Option<FastFieldReader<f64>> {
+ self.fast_field_f64.get(&field).cloned()
+ }
+
/// Returns a `u64s` multi-valued fast field reader reader associated to `field`.
///
/// If `field` is not a u64 multi-valued fast field, this method returns `None`.
@@ -182,6 +213,13 @@ impl FastFieldReaders {
self.fast_field_i64s.get(&field).cloned()
}
+ /// Returns a `f64s` multi-valued fast field reader reader associated to `field`.
+ ///
+ /// If `field` is not a f64 multi-valued fast field, this method returns `None`.
+ pub fn f64s(&self, field: Field) -> Option<MultiValueIntFastFieldReader<f64>> {
+ self.fast_field_f64s.get(&field).cloned()
+ }
+
/// Returns the `bytes` fast field reader associated to `field`.
///
/// If `field` is not a bytes fast field, returns `None`.
diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs
index a9d9120..f1817f6 100644
--- a/src/fastfield/writer.rs
+++ b/src/fastfield/writer.rs
@@ -25,13 +25,13 @@ impl FastFieldsWriter {
for (field_id, field_entry) in schema.fields().iter().enumerate() {
let field = Field(field_id as u32);
- let default_value = if let FieldType::I64(_) = *field_entry.field_type() {
- common::i64_to_u64(0i64)
- } else {
- 0u64
+ let default_value = match *field_entry.field_type() {
+ FieldType::I64(_) => common::i64_to_u64(0i64),
+ FieldType::F64(_) => common::f64_to_u64(0.0f64),
+ _ => 0u64,
};
match *field_entry.field_type() {
- FieldType::I64(ref int_options) | FieldType::U64(ref int_options) => {
+ FieldType::I64(ref int_options) | FieldType::U64(ref int_options) | FieldType::F64(ref int_options) => {
match int_options.get_fastfield_cardinality() {
Some(Cardinality::SingleValue) => {
let mut fast_field_writer = IntFastFieldWriter::new(field);
@@ -142,9 +142,9 @@ impl FastFieldsWriter {
/// bitpacked and the number of bits required for bitpacking
/// can only been known once we have seen all of the values.
///
-/// Both u64, and i64 use the same writer.
-/// i64 are just remapped to the `0..2^64 - 1`
-/// using `common::i64_to_u64`.
+/// Both u64, i64 and f64 use the same writer.
+/// i64 and f64 are just remapped to the `0..2^64 - 1`
+/// using `common::i64_to_u64` and `common::f64_to_u64`.
pub struct IntFastFieldWriter {
field: Field,
vals: Vec<u8>,
@@ -203,8 +203,8 @@ impl IntFastFieldWriter {
/// Extract the value associated to the fast field for
/// this document.
///
- /// i64 are remapped to u64 using the logic
- /// in `common::i64_to_u64`.
+ /// i64 and f64 are remapped to u64 using the logic
+ /// in `common::i64_to_u64` and `common::f64_to_u64`.
///
/// If the value is missing, then the default value is used
/// instead.
diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs
index d01f351..be38c0a 100644
--- a/src/indexer/merger.rs
+++ b/src/indexer/merger.rs
@@ -207,6 +207,7 @@ impl IndexMerger {
}
FieldType::U64(ref options)
| FieldType::I64(ref options)
+ | FieldType::F64(ref options)
| FieldType::Date(ref options) => match options.get_fastfield_cardinality() {
Some(Cardinality::SingleValue) => {
self.write_single_fast_field(field, fast_field_serializer)?;
diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs
index b4c54c2..4dd73df 100644
--- a/src/indexer/segment_writer.rs
+++ b/src/indexer/segment_writer.rs
@@ -214,6 +214,17 @@ impl SegmentWriter {
}
}
}
+ FieldType::F64(ref int_option) => {
+ if int_option.is_indexed() {
+ for field_value in field_values {
+ let term = Term::from_field_f64(
+ field_value.field(),
+ field_value.value().f64_value(),
+ );
+ self.multifield_postings.subscribe(doc_id, &term);
+ }
+ }
+ }
FieldType::Bytes => {
// Do nothing. Bytes only supports fast fields.
}
diff --git a/src/lib.rs b/src/lib.rs
index be29b62..b423f43 100755
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -179,7 +179,7 @@ pub use crate::indexer::IndexWriter;
pub use crate::postings::Postings;
pub use crate::schema::{Document, Term};
-pub use crate::common::{i64_to_u64, u64_to_i64};
+pub use crate::common::{i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64};
/// Expose the current version of tantivy, as well
/// whether it was compiled with the simd compression.
@@ -626,6 +626,30 @@ mod tests {
}
#[test]
+ fn test_indexed_f64() {
+ let mut schema_builder = Schema::builder();
+ let value_field = schema_builder.add_f64_field("value", INDEXED);
+ let schema = schema_builder.build();
+
+ let index = Index::create_in_ram(schema);
+ let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap();
+ let val = std::f64::consts::PI;
+ index_writer.add_document(doc!(value_field => val));
+ index_writer.commit().unwrap();
+ let reader = index.reader().unwrap();
+ let searcher = reader.searcher();
+ let term = Term::from_field_f64(value_field, val);
+ let mut postings = searcher
+ .segment_reader(0)
+ .inverted_index(term.field())
+ .read_postings(&term, IndexRecordOption::Basic)
+ .unwrap();
+ assert!(postings.advance());
+ assert_eq!(postings.doc(), 0);
+ assert!(!postings.advance());
+ }
+
+ #[test]
fn test_indexedfield_not_in_documents() {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", TEXT);
@@ -817,6 +841,7 @@ mod tests {
let mut schema_builder = Schema::builder();
let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST);
let fast_field_signed = schema_builder.add_i64_field("signed", FAST);
+ let fast_field_float = schema_builder.add_f64_field("float", FAST);
let text_field = schema_builder.add_text_field("text", TEXT);
let stored_int_field = schema_builder.add_u64_field("text", STORED);
let schema = schema_builder.build();
@@ -824,7 +849,7 @@ mod tests {
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap();
{
- let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64);
+ let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64);
index_writer.add_document(document);
index_writer.commit().unwrap();
}
@@ -844,10 +869,14 @@ mod tests {
assert!(fast_field_reader_opt.is_none());
}
{
- let fast_field_reader_opt = segment_reader.fast_fields().i64(fast_field_signed);
+ let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_float);
+ assert!(fast_field_reader_opt.is_none());
+ }
+ {
+ let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned);
assert!(fast_field_reader_opt.is_some());
let fast_field_reader = fast_field_reader_opt.unwrap();
- assert_eq!(fast_field_reader.get(0), 4i64)
+ assert_eq!(fast_field_reader.get(0), 4u64)
}
{
@@ -856,5 +885,12 @@ mod tests {
let fast_field_reader = fast_field_reader_opt.unwrap();
assert_eq!(fast_field_reader.get(0), 4i64)
}
+
+ {
+ let fast_field_reader_opt = segment_reader.fast_fields().f64(fast_field_float);
+ assert!(fast_field_reader_opt.is_some());
+ let fast_field_reader = fast_field_reader_opt.unwrap();
+ assert_eq!(fast_field_reader.get(0), 4f64)
+ }
}
}
diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs
index bff89e4..9b6e007 100644
--- a/src/postings/postings_writer.rs
+++ b/src/postings/postings_writer.rs
@@ -35,6 +35,7 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter>
.unwrap_or_else(|| SpecializedPostingsWriter::<NothingRecorder>::new_boxed()),
FieldType::U64(_)
| FieldType::I64(_)
+ | FieldType::F64(_)
| FieldType::Date(_)
| FieldType::HierarchicalFacet => SpecializedPostingsWriter::<NothingRecorder>::new_boxed(),
FieldType::Bytes => {
@@ -154,7 +155,7 @@ impl MultiFieldPostingsWriter {
.collect();
unordered_term_mappings.insert(field, mapping);
}
- FieldType::U64(_) | FieldType::I64(_) | FieldType::Date(_) => {}
+ FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => {}
FieldType::Bytes => {}
}
diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs
index 6a968c6..a3df714 100644
--- a/src/query/query_parser/query_grammar.rs
+++ b/src/query/query_parser/query_grammar.rs
@@ -20,7 +20,7 @@ parser! {
parser! {
fn word[I]()(I) -> String
where [I: Stream<Item = char>] {
- many1(satisfy(char::is_alphanumeric))
+ many1(satisfy(|c: char| c.is_alphanumeric() || c=='.'))
.and_then(|s: String| {
match s.as_str() {
"OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
@@ -266,6 +266,7 @@ mod test {
test_parse_query_to_ast_helper("(+a)", "+(\"a\")");
test_parse_query_to_ast_helper("(+a +b)", "(+(\"a\") +(\"b\"))");
test_parse_query_to_ast_helper("abc:toto", "abc:\"toto\"");
+ test_parse_query_to_ast_helper("abc:1.1", "abc:\"1.1\"");
test_parse_query_to_ast_helper("+abc:toto", "+(abc:\"toto\")");
test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+(abc:\"toto\") -(\"titi\"))");
test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")");
@@ -277,6 +278,7 @@ mod test {
test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}");
test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}");
test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
+ test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
test_is_parse_err("abc + ");
}
}
diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs
index bef73ca..ffa7b20 100644
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -18,7 +18,7 @@ use crate::schema::{FieldType, Term};
use crate::tokenizer::TokenizerManager;
use combine::Parser;
use std::borrow::Cow;
-use std::num::ParseIntError;
+use std::num::{ParseIntError, ParseFloatError};
use std::ops::Bound;
use std::str::FromStr;
@@ -30,9 +30,12 @@ pub enum QueryParserError {
/// `FieldDoesNotExist(field_name: String)`
/// The query references a field that is not in the schema
FieldDoesNotExist(String),
- /// The query contains a term for a `u64`-field, but the value
- /// is not a u64.
+ /// The query contains a term for a `u64` or `i64`-field, but the value
+ /// is neither.
ExpectedInt(ParseIntError),
+ /// The query contains a term for a `f64`-field, but the value
+ /// is not a f64.
+ ExpectedFloat(ParseFloatError),
/// It is forbidden queries that are only "excluding". (e.g. -title:pop)
AllButQueryForbidden,
/// If no default field is declared, running a query without any
@@ -60,6 +63,12 @@ impl From<ParseIntError> for QueryParserError {
}
}
+impl From<ParseFloatError> for QueryParserError {
+ fn from(err: ParseFloatError) -> QueryParserError {
+ QueryParserError::ExpectedFloat(err)
+ }
+}
+
impl From<chrono::ParseError> for QueryParserError {
fn from(err: chrono::ParseError) -> QueryParserError {
QueryParserError::DateFormatError(err)
@@ -239,6 +248,11 @@ impl QueryParser {
let term = Term::from_field_i64(field, val);
Ok(vec![(0, term)])
}
+ FieldType::F64(_) => {
+ let val: f64 = f64::from_str(phrase)?;
+ let term = Term::from_field_f64(field, val);
+ Ok(vec![(0, term)])
+ }
FieldType::Date(_) => match chrono::DateTime::parse_from_rfc3339(phrase) {
Ok(x) => Ok(vec![(
0,
@@ -529,6 +543,7 @@ mod test {
schema_builder.add_text_field("nottokenized", STRING);
schema_builder.add_text_field("with_stop_words", text_options);
schema_builder.add_date_field("date", INDEXED);
+ schema_builder.add_f64_field("float", INDEXED);
let schema = schema_builder.build();
let default_fields = vec![title, text];
let tokenizer_manager = TokenizerManager::default();
@@ -634,6 +649,13 @@ mod test {
assert!(query_parser
.parse_query("unsigned:\"18446744073709551615\"")
.is_ok());
+ assert!(query_parser.parse_query("float:\"3.1\"").is_ok());
+ assert!(query_parser.parse_query("float:\"-2.4\"").is_ok());
+ assert!(query_parser.parse_query("float:\"2.1.2\"").is_err());
+ assert!(query_parser.parse_query("float:\"2.1a\"").is_err());
+ assert!(query_parser
+ .parse_query("float:\"18446744073709551615.0\"")
+ .is_ok());
test_parse_query_to_logical_ast_helper(
"unsigned:2324",
"Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])",
@@ -645,6 +667,12 @@ mod test {
&format!("{:?}", Term::from_field_i64(Field(2u32), -2324)),
false,
);
+
+ test_parse_query_to_logical_ast_helper(
+ "float:2.5",
+ &format!("{:?}", Term::from_field_f64(Field(10u32), 2.5)),
+ false,
+ );
}
#[test]
@@ -786,6 +814,11 @@ mod test {
query_parser.parse_query("signed:18b"),
Err(QueryParserError::ExpectedInt(_))
);
+ assert!(query_parser.parse_query("float:\"1.8\"").is_ok());
+ assert_matches!(
+ query_parser.parse_query("float:1.8a"),
+ Err(QueryParserError::ExpectedFloat(_))
+ );
}
#[test]
diff --git a/src/query/range_query.rs b/src/query/range_query.rs
index 1ec9d20..76a0c15 100644
--- a/src/query/range_query.rs
+++ b/src/query/range_query.rs
@@ -142,6 +142,39 @@ impl RangeQuery {
}
}
+ /// Creates a new `RangeQuery` over a `f64` field.
+ ///
+ /// If the field is not of the type `f64`, tantivy
+ /// will panic when the `Weight` object is created.
+ pub fn new_f64(field: Field, range: Range<f64>) -> RangeQuery {
+ RangeQuery::new_f64_bounds(
+ field,
+ Bound::Included(range.start),
+ Bound::Excluded(range.end),
+ )
+ }
+
+ /// Create a new `RangeQuery` over a `f64` field.
+ ///
+ /// The two `Bound` arguments make it possible to create more complex
+ /// ranges than semi-inclusive range.
+ ///
+ /// If the field is not of the type `f64`, tantivy
+ /// will panic when the `Weight` object is created.
+ pub fn new_f64_bounds(
+ field: Field,
+ left_bound: Bound<f64>,
+ right_bound: Bound<f64>,
+ ) -> RangeQuery {
+ let make_term_val = |val: &f64| Term::from_field_f64(field, *val).value_bytes().to_owned();
+ RangeQuery {
+ field,
+ value_type: Type::F64,
+ left_bound: map_bound(&left_bound, &make_term_val),
+ right_bound: map_bound(&right_bound, &make_term_val),
+ }
+ }
+
/// Create a new `RangeQuery` over a `u64` field.
///
/// The two `Bound` arguments make it possible to create more complex
@@ -397,4 +430,61 @@ mod tests {
);
}
+ #[test]
+ fn test_range_float() {
+ let float_field: Field;
+ let schema = {
+ let mut schema_builder = Schema::builder();
+ float_field = schema_builder.add_f64_field("floatfield", INDEXED);
+ schema_builder.build()
+ };
+
+ let index = Index::create_in_ram(schema);
+ {
+ let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap();
+
+ for i in 1..100 {