diff options
author | fdb-hiroshima <35889323+fdb-hiroshima@users.noreply.github.com> | 2019-07-27 10:57:33 +0200 |
---|---|---|
committer | Paul Masurel <paul.masurel@gmail.com> | 2019-07-27 17:57:33 +0900 |
commit | 6eb4e08636f48091f7783d52c84ef78e7ff1ada4 (patch) | |
tree | 2a91b898c007abfdf32e4261950a8625bc2749ca | |
parent | c3231ca252da192e7e164b9b7474a6b99b85d7a7 (diff) |
add support for float (#603)
* add basic support for float
as for i64, they are mapped to u64 for indexing
query parser don't work yet
* Update value.rs
* implement support for float in query parser
* Update README.md
-rw-r--r-- | CHANGELOG.md | 5 | ||||
-rw-r--r-- | README.md | 4 | ||||
-rw-r--r-- | src/collector/int_facet_collector.rs | 7 | ||||
-rw-r--r-- | src/collector/top_score_collector.rs | 1 | ||||
-rw-r--r-- | src/common/mod.rs | 63 | ||||
-rw-r--r-- | src/common/serialize.rs | 18 | ||||
-rw-r--r-- | src/fastfield/mod.rs | 26 | ||||
-rw-r--r-- | src/fastfield/readers.rs | 38 | ||||
-rw-r--r-- | src/fastfield/writer.rs | 20 | ||||
-rw-r--r-- | src/indexer/merger.rs | 1 | ||||
-rw-r--r-- | src/indexer/segment_writer.rs | 11 | ||||
-rwxr-xr-x | src/lib.rs | 44 | ||||
-rw-r--r-- | src/postings/postings_writer.rs | 3 | ||||
-rw-r--r-- | src/query/query_parser/query_grammar.rs | 4 | ||||
-rw-r--r-- | src/query/query_parser/query_parser.rs | 39 | ||||
-rw-r--r-- | src/query/range_query.rs | 90 | ||||
-rw-r--r-- | src/schema/document.rs | 5 | ||||
-rw-r--r-- | src/schema/field_entry.rs | 20 | ||||
-rw-r--r-- | src/schema/field_type.rs | 18 | ||||
-rw-r--r-- | src/schema/flags.rs | 4 | ||||
-rw-r--r-- | src/schema/mod.rs | 2 | ||||
-rw-r--r-- | src/schema/schema.rs | 54 | ||||
-rw-r--r-- | src/schema/term.rs | 30 | ||||
-rw-r--r-- | src/schema/value.rs | 81 | ||||
-rw-r--r-- | src/termdict/mod.rs | 3 |
25 files changed, 545 insertions, 46 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 465210f..4f40ff2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +Tantivy 0.11.0 +===================== + +- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima) + Tantivy 0.10.0 ===================== @@ -50,9 +50,9 @@ performance for different type of queries / collection. - Multithreaded indexing (indexing English Wikipedia takes < 3 minutes on my desktop) - Mmap directory - SIMD integer compression when the platform/CPU includes the SSE2 instruction set. -- Single valued and multivalued u64 and i64 fast fields (equivalent of doc values in Lucene) +- Single valued and multivalued u64, i64 and f64 fast fields (equivalent of doc values in Lucene) - `&[u8]` fast fields -- Text, i64, u64, dates and hierarchical facet fields +- Text, i64, u64, f64, dates and hierarchical facet fields - LZ4 compressed document store - Range queries - Faceted search diff --git a/src/collector/int_facet_collector.rs b/src/collector/int_facet_collector.rs index 4232343..d9b4c13 100644 --- a/src/collector/int_facet_collector.rs +++ b/src/collector/int_facet_collector.rs @@ -82,6 +82,7 @@ mod tests { let mut schema_builder = schema::Schema::builder(); let num_field_i64 = schema_builder.add_i64_field("num_i64", FAST); let num_field_u64 = schema_builder.add_u64_field("num_u64", FAST); + let num_field_f64 = schema_builder.add_f64_field("num_f64", FAST); let text_field = schema_builder.add_text_field("text", STRING); let schema = schema_builder.build(); @@ -94,6 +95,7 @@ mod tests { index_writer.add_document(doc!( num_field_i64 => ((i as i64) % 3i64) as i64, num_field_u64 => (i % 2u64) as u64, + num_field_f64 => (i % 4u64) as f64, text_field => "text" )); } @@ -104,10 +106,11 @@ mod tests { let searcher = index.reader().searcher(); let mut ffvf_i64: IntFacetCollector<I64FastFieldReader> = IntFacetCollector::new(num_field_i64); let mut ffvf_u64: IntFacetCollector<U64FastFieldReader> = IntFacetCollector::new(num_field_u64); + let mut ffvf_f64: IntFacetCollector<F64FastFieldReader> = IntFacetCollector::new(num_field_f64); { // perform the query - let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64); + let mut facet_collectors = chain().push(&mut ffvf_i64).push(&mut ffvf_u64).push(&mut ffvf_f64); let mut query_parser = QueryParser::for_index(index, vec![text_field]); let query = query_parser.parse_query("text:text").unwrap(); query.search(&searcher, &mut facet_collectors).unwrap(); @@ -117,6 +120,8 @@ mod tests { assert_eq!(ffvf_u64.counters[&1], 5); assert_eq!(ffvf_i64.counters[&0], 4); assert_eq!(ffvf_i64.counters[&1], 3); + assert_eq!(ffvf_f64.counters[&0.0], 3); + assert_eq!(ffvf_f64.counters[&2.0], 2); } } diff --git a/src/collector/top_score_collector.rs b/src/collector/top_score_collector.rs index c9b03d0..4ec09ce 100644 --- a/src/collector/top_score_collector.rs +++ b/src/collector/top_score_collector.rs @@ -160,6 +160,7 @@ impl TopDocs { .fast_fields() .u64(field) .expect("Field requested is not a i64/u64 fast field."); + //TODO error message missmatch actual behavior for i64 move |doc: DocId| ff_reader.get(doc) }) } diff --git a/src/common/mod.rs b/src/common/mod.rs index 8f6deaf..7e41f08 100644 --- a/src/common/mod.rs +++ b/src/common/mod.rs @@ -99,15 +99,54 @@ pub fn u64_to_i64(val: u64) -> i64 { (val ^ HIGHEST_BIT) as i64 } +/// Maps a `f64` to `u64` +/// +/// For simplicity, tantivy internally handles `f64` as `u64`. +/// The mapping is defined by this function. +/// +/// Maps `f64` to `u64` so that lexical order is preserved. +/// +/// This is more suited than simply casting (`val as u64`) +/// which would truncate the result +/// +/// # See also +/// The [reverse mapping is `u64_to_f64`](./fn.u64_to_f64.html). +#[inline(always)] +pub fn f64_to_u64(val: f64) -> u64 { + let bits = val.to_bits(); + if val.is_sign_positive() { + bits ^ HIGHEST_BIT + } else { + !bits + } +} + +/// Reverse the mapping given by [`i64_to_u64`](./fn.i64_to_u64.html). +#[inline(always)] +pub fn u64_to_f64(val: u64) -> f64 { + f64::from_bits( + if val & HIGHEST_BIT != 0 { + val ^ HIGHEST_BIT + } else { + !val + } + ) +} + #[cfg(test)] pub(crate) mod test { pub use super::serialize::test::fixed_size_test; - use super::{compute_num_bits, i64_to_u64, u64_to_i64}; + use super::{compute_num_bits, i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64}; + use std::f64; fn test_i64_converter_helper(val: i64) { assert_eq!(u64_to_i64(i64_to_u64(val)), val); } + + fn test_f64_converter_helper(val: f64) { + assert_eq!(u64_to_f64(f64_to_u64(val)), val); + } #[test] fn test_i64_converter() { @@ -122,6 +161,28 @@ pub(crate) mod test { } #[test] + fn test_f64_converter() { + test_f64_converter_helper(f64::INFINITY); + test_f64_converter_helper(f64::NEG_INFINITY); + test_f64_converter_helper(0.0); + test_f64_converter_helper(-0.0); + test_f64_converter_helper(1.0); + test_f64_converter_helper(-1.0); + } + + #[test] + fn test_f64_order() { + assert!(!(f64_to_u64(f64::NEG_INFINITY)..f64_to_u64(f64::INFINITY)).contains(&f64_to_u64(f64::NAN))); //nan is not a number + assert!(f64_to_u64(1.5) > f64_to_u64(1.0)); //same exponent, different mantissa + assert!(f64_to_u64(2.0) > f64_to_u64(1.0)); //same mantissa, different exponent + assert!(f64_to_u64(2.0) > f64_to_u64(1.5)); //different exponent and mantissa + assert!(f64_to_u64(1.0) > f64_to_u64(-1.0)); // pos > neg + assert!(f64_to_u64(-1.5) < f64_to_u64(-1.0)); + assert!(f64_to_u64(-2.0) < f64_to_u64(1.0)); + assert!(f64_to_u64(-2.0) < f64_to_u64(-1.5)); + } + + #[test] fn test_compute_num_bits() { assert_eq!(compute_num_bits(1), 1u8); assert_eq!(compute_num_bits(0), 0u8); diff --git a/src/common/serialize.rs b/src/common/serialize.rs index 4156115..85a9443 100644 --- a/src/common/serialize.rs +++ b/src/common/serialize.rs @@ -102,6 +102,19 @@ impl FixedSize for i64 { const SIZE_IN_BYTES: usize = 8; } +impl BinarySerializable for f64 { + fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { + writer.write_f64::<Endianness>(*self) + } + fn deserialize<R: Read>(reader: &mut R) -> io::Result<Self> { + reader.read_f64::<Endianness>() + } +} + +impl FixedSize for f64 { + const SIZE_IN_BYTES: usize = 8; +} + impl BinarySerializable for u8 { fn serialize<W: Write>(&self, writer: &mut W) -> io::Result<()> { writer.write_u8(*self) @@ -173,6 +186,11 @@ pub mod test { } #[test] + fn test_serialize_f64() { + fixed_size_test::<f64>(); + } + + #[test] fn test_serialize_u64() { fixed_size_test::<u64>(); } diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index aa5104d..de8f932 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -48,7 +48,7 @@ mod readers; mod serializer; mod writer; -/// Trait for types that are allowed for fast fields: (u64 or i64). +/// Trait for types that are allowed for fast fields: (u64, i64 and f64). pub trait FastValue: Default + Clone + Copy + Send + Sync + PartialOrd { /// Converts a value from u64 /// @@ -114,11 +114,33 @@ impl FastValue for i64 { } } +impl FastValue for f64 { + fn from_u64(val: u64) -> Self { + common::u64_to_f64(val) + } + + fn to_u64(&self) -> u64 { + common::f64_to_u64(*self) + } + + fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> { + match *field_type { + FieldType::F64(ref integer_options) => integer_options.get_fastfield_cardinality(), + _ => None, + } + } + + fn as_u64(&self) -> u64 { + self.to_bits() + } +} + fn value_to_u64(value: &Value) -> u64 { match *value { Value::U64(ref val) => *val, Value::I64(ref val) => common::i64_to_u64(*val), - _ => panic!("Expected a u64/i64 field, got {:?} ", value), + Value::F64(ref val) => common::f64_to_u64(*val), + _ => panic!("Expected a u64/i64/f64 field, got {:?} ", value), } } diff --git a/src/fastfield/readers.rs b/src/fastfield/readers.rs index 4019cf3..1eb4ca2 100644 --- a/src/fastfield/readers.rs +++ b/src/fastfield/readers.rs @@ -14,8 +14,10 @@ use std::collections::HashMap; pub struct FastFieldReaders { fast_field_i64: HashMap<Field, FastFieldReader<i64>>, fast_field_u64: HashMap<Field, FastFieldReader<u64>>, + fast_field_f64: HashMap<Field, FastFieldReader<f64>>, fast_field_i64s: HashMap<Field, MultiValueIntFastFieldReader<i64>>, fast_field_u64s: HashMap<Field, MultiValueIntFastFieldReader<u64>>, + fast_field_f64s: HashMap<Field, MultiValueIntFastFieldReader<f64>>, fast_bytes: HashMap<Field, BytesFastFieldReader>, fast_fields_composite: CompositeFile, } @@ -23,6 +25,7 @@ pub struct FastFieldReaders { enum FastType { I64, U64, + F64, } fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality)> { @@ -33,6 +36,9 @@ fn type_and_cardinality(field_type: &FieldType) -> Option<(FastType, Cardinality FieldType::I64(options) => options .get_fastfield_cardinality() .map(|cardinality| (FastType::I64, cardinality)), + FieldType::F64(options) => options + .get_fastfield_cardinality() + .map(|cardinality| (FastType::F64, cardinality)), FieldType::HierarchicalFacet => Some((FastType::U64, Cardinality::MultiValues)), _ => None, } @@ -46,8 +52,10 @@ impl FastFieldReaders { let mut fast_field_readers = FastFieldReaders { fast_field_i64: Default::default(), fast_field_u64: Default::default(), + fast_field_f64: Default::default(), fast_field_i64s: Default::default(), fast_field_u64s: Default::default(), + fast_field_f64s: Default::default(), fast_bytes: Default::default(), fast_fields_composite: fast_fields_composite.clone(), }; @@ -82,6 +90,12 @@ impl FastFieldReaders { FastFieldReader::open(fast_field_data.clone()), ); } + FastType::F64 => { + fast_field_readers.fast_field_f64.insert( + field, + FastFieldReader::open(fast_field_data.clone()), + ); + } } } else { return Err(From::from(FastFieldNotAvailableError::new(field_entry))); @@ -109,6 +123,14 @@ impl FastFieldReaders { .fast_field_u64s .insert(field, multivalued_int_fast_field); } + FastType::F64 => { + let vals_reader = FastFieldReader::open(fast_field_data); + let multivalued_int_fast_field = + MultiValueIntFastFieldReader::open(idx_reader, vals_reader); + fast_field_readers + .fast_field_f64s + .insert(field, multivalued_int_fast_field); + } } } else { return Err(From::from(FastFieldNotAvailableError::new(field_entry))); @@ -135,6 +157,8 @@ impl FastFieldReaders { /// If the field is a i64-fast field, return the associated u64 reader. Values are /// mapped from i64 to u64 using a (well the, it is unique) monotonic mapping. /// /// + ///TODO should it also be lenient with f64? + /// /// This method is useful when merging segment reader. pub(crate) fn u64_lenient(&self, field: Field) -> Option<FastFieldReader<u64>> { if let Some(u64_ff_reader) = self.u64(field) { @@ -153,6 +177,13 @@ impl FastFieldReaders { self.fast_field_i64.get(&field).cloned() } + /// Returns the `f64` fast field reader reader associated to `field`. + /// + /// If `field` is not a f64 fast field, this method returns `None`. + pub fn f64(&self, field: Field) -> Option<FastFieldReader<f64>> { + self.fast_field_f64.get(&field).cloned() + } + /// Returns a `u64s` multi-valued fast field reader reader associated to `field`. /// /// If `field` is not a u64 multi-valued fast field, this method returns `None`. @@ -182,6 +213,13 @@ impl FastFieldReaders { self.fast_field_i64s.get(&field).cloned() } + /// Returns a `f64s` multi-valued fast field reader reader associated to `field`. + /// + /// If `field` is not a f64 multi-valued fast field, this method returns `None`. + pub fn f64s(&self, field: Field) -> Option<MultiValueIntFastFieldReader<f64>> { + self.fast_field_f64s.get(&field).cloned() + } + /// Returns the `bytes` fast field reader associated to `field`. /// /// If `field` is not a bytes fast field, returns `None`. diff --git a/src/fastfield/writer.rs b/src/fastfield/writer.rs index a9d9120..f1817f6 100644 --- a/src/fastfield/writer.rs +++ b/src/fastfield/writer.rs @@ -25,13 +25,13 @@ impl FastFieldsWriter { for (field_id, field_entry) in schema.fields().iter().enumerate() { let field = Field(field_id as u32); - let default_value = if let FieldType::I64(_) = *field_entry.field_type() { - common::i64_to_u64(0i64) - } else { - 0u64 + let default_value = match *field_entry.field_type() { + FieldType::I64(_) => common::i64_to_u64(0i64), + FieldType::F64(_) => common::f64_to_u64(0.0f64), + _ => 0u64, }; match *field_entry.field_type() { - FieldType::I64(ref int_options) | FieldType::U64(ref int_options) => { + FieldType::I64(ref int_options) | FieldType::U64(ref int_options) | FieldType::F64(ref int_options) => { match int_options.get_fastfield_cardinality() { Some(Cardinality::SingleValue) => { let mut fast_field_writer = IntFastFieldWriter::new(field); @@ -142,9 +142,9 @@ impl FastFieldsWriter { /// bitpacked and the number of bits required for bitpacking /// can only been known once we have seen all of the values. /// -/// Both u64, and i64 use the same writer. -/// i64 are just remapped to the `0..2^64 - 1` -/// using `common::i64_to_u64`. +/// Both u64, i64 and f64 use the same writer. +/// i64 and f64 are just remapped to the `0..2^64 - 1` +/// using `common::i64_to_u64` and `common::f64_to_u64`. pub struct IntFastFieldWriter { field: Field, vals: Vec<u8>, @@ -203,8 +203,8 @@ impl IntFastFieldWriter { /// Extract the value associated to the fast field for /// this document. /// - /// i64 are remapped to u64 using the logic - /// in `common::i64_to_u64`. + /// i64 and f64 are remapped to u64 using the logic + /// in `common::i64_to_u64` and `common::f64_to_u64`. /// /// If the value is missing, then the default value is used /// instead. diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index d01f351..be38c0a 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -207,6 +207,7 @@ impl IndexMerger { } FieldType::U64(ref options) | FieldType::I64(ref options) + | FieldType::F64(ref options) | FieldType::Date(ref options) => match options.get_fastfield_cardinality() { Some(Cardinality::SingleValue) => { self.write_single_fast_field(field, fast_field_serializer)?; diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index b4c54c2..4dd73df 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -214,6 +214,17 @@ impl SegmentWriter { } } } + FieldType::F64(ref int_option) => { + if int_option.is_indexed() { + for field_value in field_values { + let term = Term::from_field_f64( + field_value.field(), + field_value.value().f64_value(), + ); + self.multifield_postings.subscribe(doc_id, &term); + } + } + } FieldType::Bytes => { // Do nothing. Bytes only supports fast fields. } @@ -179,7 +179,7 @@ pub use crate::indexer::IndexWriter; pub use crate::postings::Postings; pub use crate::schema::{Document, Term}; -pub use crate::common::{i64_to_u64, u64_to_i64}; +pub use crate::common::{i64_to_u64, u64_to_i64, f64_to_u64, u64_to_f64}; /// Expose the current version of tantivy, as well /// whether it was compiled with the simd compression. @@ -626,6 +626,30 @@ mod tests { } #[test] + fn test_indexed_f64() { + let mut schema_builder = Schema::builder(); + let value_field = schema_builder.add_f64_field("value", INDEXED); + let schema = schema_builder.build(); + + let index = Index::create_in_ram(schema); + let mut index_writer = index.writer_with_num_threads(1, 3_000_000).unwrap(); + let val = std::f64::consts::PI; + index_writer.add_document(doc!(value_field => val)); + index_writer.commit().unwrap(); + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + let term = Term::from_field_f64(value_field, val); + let mut postings = searcher + .segment_reader(0) + .inverted_index(term.field()) + .read_postings(&term, IndexRecordOption::Basic) + .unwrap(); + assert!(postings.advance()); + assert_eq!(postings.doc(), 0); + assert!(!postings.advance()); + } + + #[test] fn test_indexedfield_not_in_documents() { let mut schema_builder = Schema::builder(); let text_field = schema_builder.add_text_field("text", TEXT); @@ -817,6 +841,7 @@ mod tests { let mut schema_builder = Schema::builder(); let fast_field_unsigned = schema_builder.add_u64_field("unsigned", FAST); let fast_field_signed = schema_builder.add_i64_field("signed", FAST); + let fast_field_float = schema_builder.add_f64_field("float", FAST); let text_field = schema_builder.add_text_field("text", TEXT); let stored_int_field = schema_builder.add_u64_field("text", STORED); let schema = schema_builder.build(); @@ -824,7 +849,7 @@ mod tests { let index = Index::create_in_ram(schema); let mut index_writer = index.writer_with_num_threads(1, 50_000_000).unwrap(); { - let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64); + let document = doc!(fast_field_unsigned => 4u64, fast_field_signed=>4i64, fast_field_float=>4f64); index_writer.add_document(document); index_writer.commit().unwrap(); } @@ -844,10 +869,14 @@ mod tests { assert!(fast_field_reader_opt.is_none()); } { - let fast_field_reader_opt = segment_reader.fast_fields().i64(fast_field_signed); + let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_float); + assert!(fast_field_reader_opt.is_none()); + } + { + let fast_field_reader_opt = segment_reader.fast_fields().u64(fast_field_unsigned); assert!(fast_field_reader_opt.is_some()); let fast_field_reader = fast_field_reader_opt.unwrap(); - assert_eq!(fast_field_reader.get(0), 4i64) + assert_eq!(fast_field_reader.get(0), 4u64) } { @@ -856,5 +885,12 @@ mod tests { let fast_field_reader = fast_field_reader_opt.unwrap(); assert_eq!(fast_field_reader.get(0), 4i64) } + + { + let fast_field_reader_opt = segment_reader.fast_fields().f64(fast_field_float); + assert!(fast_field_reader_opt.is_some()); + let fast_field_reader = fast_field_reader_opt.unwrap(); + assert_eq!(fast_field_reader.get(0), 4f64) + } } } diff --git a/src/postings/postings_writer.rs b/src/postings/postings_writer.rs index bff89e4..9b6e007 100644 --- a/src/postings/postings_writer.rs +++ b/src/postings/postings_writer.rs @@ -35,6 +35,7 @@ fn posting_from_field_entry(field_entry: &FieldEntry) -> Box<dyn PostingsWriter> .unwrap_or_else(|| SpecializedPostingsWriter::<NothingRecorder>::new_boxed()), FieldType::U64(_) | FieldType::I64(_) + | FieldType::F64(_) | FieldType::Date(_) | FieldType::HierarchicalFacet => SpecializedPostingsWriter::<NothingRecorder>::new_boxed(), FieldType::Bytes => { @@ -154,7 +155,7 @@ impl MultiFieldPostingsWriter { .collect(); unordered_term_mappings.insert(field, mapping); } - FieldType::U64(_) | FieldType::I64(_) | FieldType::Date(_) => {} + FieldType::U64(_) | FieldType::I64(_) | FieldType::F64(_) | FieldType::Date(_) => {} FieldType::Bytes => {} } diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index 6a968c6..a3df714 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -20,7 +20,7 @@ parser! { parser! { fn word[I]()(I) -> String where [I: Stream<Item = char>] { - many1(satisfy(char::is_alphanumeric)) + many1(satisfy(|c: char| c.is_alphanumeric() || c=='.')) .and_then(|s: String| { match s.as_str() { "OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")), @@ -266,6 +266,7 @@ mod test { test_parse_query_to_ast_helper("(+a)", "+(\"a\")"); test_parse_query_to_ast_helper("(+a +b)", "(+(\"a\") +(\"b\"))"); test_parse_query_to_ast_helper("abc:toto", "abc:\"toto\""); + test_parse_query_to_ast_helper("abc:1.1", "abc:\"1.1\""); test_parse_query_to_ast_helper("+abc:toto", "+(abc:\"toto\")"); test_parse_query_to_ast_helper("(+abc:toto -titi)", "(+(abc:\"toto\") -(\"titi\"))"); test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")"); @@ -277,6 +278,7 @@ mod test { test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}"); test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}"); test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}"); + test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}"); test_is_parse_err("abc + "); } } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index bef73ca..ffa7b20 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -18,7 +18,7 @@ use crate::schema::{FieldType, Term}; use crate::tokenizer::TokenizerManager; use combine::Parser; use std::borrow::Cow; -use std::num::ParseIntError; +use std::num::{ParseIntError, ParseFloatError}; use std::ops::Bound; use std::str::FromStr; @@ -30,9 +30,12 @@ pub enum QueryParserError { /// `FieldDoesNotExist(field_name: String)` /// The query references a field that is not in the schema FieldDoesNotExist(String), - /// The query contains a term for a `u64`-field, but the value - /// is not a u64. + /// The query contains a term for a `u64` or `i64`-field, but the value + /// is neither. ExpectedInt(ParseIntError), + /// The query contains a term for a `f64`-field, but the value + /// is not a f64. + ExpectedFloat(ParseFloatError), /// It is forbidden queries that are only "excluding". (e.g. -title:pop) AllButQueryForbidden, /// If no default field is declared, running a query without any @@ -60,6 +63,12 @@ impl From<ParseIntError> for QueryParserError { } } +impl From<ParseFloatError> for QueryParserError { + fn from(err: ParseFloatError) -> QueryParserError { + QueryParserError::ExpectedFloat(err) + } +} + impl From<chrono::ParseError> for QueryParserError { fn from(err: chrono::ParseError) -> QueryParserError { QueryParserError::DateFormatError(err) @@ -239,6 +248,11 @@ impl QueryParser { let term = Term::from_field_i64(field, val); Ok(vec![(0, term)]) } + FieldType::F64(_) => { + let val: f64 = f64::from_str(phrase)?; + let term = Term::from_field_f64(field, val); + Ok(vec![(0, term)]) + } FieldType::Date(_) => match chrono::DateTime::parse_from_rfc3339(phrase) { Ok(x) => Ok(vec![( 0, @@ -529,6 +543,7 @@ mod test { schema_builder.add_text_field("nottokenized", STRING); schema_builder.add_text_field("with_stop_words", text_options); schema_builder.add_date_field("date", INDEXED); + schema_builder.add_f64_field("float", INDEXED); let schema = schema_builder.build(); let default_fields = vec![title, text]; let tokenizer_manager = TokenizerManager::default(); @@ -634,6 +649,13 @@ mod test { assert!(query_parser .parse_query("unsigned:\"18446744073709551615\"") .is_ok()); + assert!(query_parser.parse_query("float:\"3.1\"").is_ok()); + assert!(query_parser.parse_query("float:\"-2.4\"").is_ok()); + assert!(query_parser.parse_query("float:\"2.1.2\"").is_err()); + assert!(query_parser.parse_query("float:\"2.1a\"").is_err()); + assert!(query_parser + .parse_query("float:\"18446744073709551615.0\"") + .is_ok()); test_parse_query_to_logical_ast_helper( "unsigned:2324", "Term([0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 9, 20])", @@ -645,6 +667,12 @@ mod test { &format!("{:?}", Term::from_field_i64(Field(2u32), -2324)), false, ); + + test_parse_query_to_logical_ast_helper( + "float:2.5", + &format!("{:?}", Term::from_field_f64(Field(10u32), 2.5)), + false, + ); } #[test] @@ -786,6 +814,11 @@ mod test { query_parser.parse_query("signed:18b"), Err(QueryParserError::ExpectedInt(_)) ); + assert!(query_parser.parse_query("float:\"1.8\"").is_ok()); + assert_matches!( + query_parser.parse_query("float:1.8a"), + Err(QueryParserError::ExpectedFloat(_)) + ); } #[test] diff --git a/src/query/range_query.rs b/src/query/range_query.rs index 1ec9d20..76a0c15 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -142,6 +142,39 @@ impl RangeQuery { } } + /// Creates a new `RangeQuery` over a `f64` field. + /// + /// If the field is not of the type `f64`, tantivy + /// will panic when the `Weight` object is created. + pub fn new_f64(field: Field, range: Range<f64>) -> RangeQuery { + RangeQuery::new_f64_bounds( + field, + Bound::Included(range.start), + Bound::Excluded(range.end), + ) + } + + /// Create a new `RangeQuery` over a `f64` field. + /// + /// The two `Bound` arguments make it possible to create more complex + /// ranges than semi-inclusive range. + /// + /// If the field is not of the type `f64`, tantivy + /// will panic when the `Weight` object is created. + pub fn new_f64_bounds( + field: Field, + left_bound: Bound<f64>, + right_bound: Bound<f64>, + ) -> RangeQuery { + let make_term_val = |val: &f64| Term::from_field_f64(field, *val).value_bytes().to_owned(); + RangeQuery { + field, + value_type: Type::F64, + left_bound: map_bound(&left_bound, &make_term_val), + right_bound: map_bound(&right_bound, &make_term_val), + } + } + /// Create a new `RangeQuery` over a `u64` field. /// /// The two `Bound` arguments make it possible to create more complex @@ -397,4 +430,61 @@ mod tests { ); } + #[test] + fn test_range_float() { + let float_field: Field; + let schema = { + let mut schema_builder = Schema::builder(); + float_field = schema_builder.add_f64_field("floatfield", INDEXED); + schema_builder.build() + }; + + let index = Index::create_in_ram(schema); + { + let mut index_writer = index.writer_with_num_threads(2, 6_000_000).unwrap(); + + for i in 1..100 { |