diff options
author | petr-tik <petr-tik@users.noreply.github.com> | 2019-08-12 00:24:47 +0100 |
---|---|---|
committer | Paul Masurel <paul.masurel@gmail.com> | 2019-08-12 08:24:47 +0900 |
commit | 028b0a749c263fae4db8705ec2b14386a52b36b5 (patch) | |
tree | 21a2ee955306363106b121112e6dd28a0a9c4cc3 | |
parent | 941f06eb9fda6ad0f6cfcc77043a09f66e78d465 (diff) |
Elastic unbounded range query (#624)
* Tidy up
fmt
remove unneccessary -> Result<()> followed by run.unwrap() in a test
* Adding support for elasticsearch-style unbounded queries
Extend the UserInputBound to include Unbounded, so we can reuse formatting and
internal query format
* Still working on elastic-style range queries
Fixes #498
Merge the elastic_range into range
Reformat to make code easier to follow, use optional() macro to return Some
* Fixed bugs
Made the range parser insensitive to whitespace between the ":" and the range.
Removed optional parsing of field.
Added a unit test for the range parser.
Derived PartialEq to compare the results of parsing as structs, instead of
strings. Found a bug with that unit test - "*}" was parsed as an
UserInputBound::Exclusive, instead of UserInputBound::Unbounded. Added an early
detection-and-return for * in the original range parser
* Correct failing test
Assume that we will use "{*" for Unbounded ranges
* Add a note in the changelog
cargo-fmt
* Moved parenthesis to a newline to make nested if-else more visible
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | src/query/query_parser/query_grammar.rs | 114 | ||||
-rw-r--r-- | src/query/query_parser/query_parser.rs | 1 | ||||
-rw-r--r-- | src/query/query_parser/user_input_ast.rs | 7 | ||||
-rw-r--r-- | src/query/range_query.rs | 44 |
5 files changed, 128 insertions, 39 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index cf482e3..063d47b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Tantivy 0.11.0 - Various bugfixes in the query parser. - Better handling of hyphens in query parser. (#609) - Better handling of whitespaces. +- Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik) Tantivy 0.10.1 diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index 2ec2bf7..4f794a8 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -83,28 +83,71 @@ parser! { } parser! { + /// Function that parses a range out of a Stream + /// Supports ranges like: + /// [5 TO 10], {5 TO 10}, [* TO 10], [10 TO *], {10 TO *], >5, <=10 + /// [a TO *], [a TO c], [abc TO bcd} fn range[I]()(I) -> UserInputLeaf where [I: Stream<Item = char>] { let range_term_val = || { word().or(negative_number()).or(char('*').with(value("*".to_string()))) }; + + // check for unbounded range in the form of <5, <=10, >5, >=5 + let elastic_unbounded_range = (choice([attempt(string(">=")), + attempt(string("<=")), + attempt(string("<")), + attempt(string(">"))]) + .skip(spaces()), + range_term_val()). + map(|(comparison_sign, bound): (&str, String)| + match comparison_sign { + ">=" => return (UserInputBound::Inclusive(bound), UserInputBound::Unbounded), + "<=" => return (UserInputBound::Unbounded, UserInputBound::Inclusive(bound)), + "<" => return (UserInputBound::Unbounded, UserInputBound::Exclusive(bound)), + ">" => return (UserInputBound::Exclusive(bound), UserInputBound::Unbounded), + // default case + _ => return (UserInputBound::Unbounded, UserInputBound::Unbounded) + }); let lower_bound = (one_of("{[".chars()), range_term_val()) .map(|(boundary_char, lower_bound): (char, String)| - if boundary_char == '{' { UserInputBound::Exclusive(lower_bound) } - else { UserInputBound::Inclusive(lower_bound) }); + if lower_bound == "*" { + UserInputBound::Unbounded + } else { + if boundary_char == '{' { + UserInputBound::Exclusive(lower_bound) + } else { + UserInputBound::Inclusive(lower_bound) + } + }); let upper_bound = (range_term_val(), one_of("}]".chars())) .map(|(higher_bound, boundary_char): (String, char)| - if boundary_char == '}' { UserInputBound::Exclusive(higher_bound) } - else { UserInputBound::Inclusive(higher_bound) }); - ( - optional(field()), - lower_bound - .skip((spaces(), string("TO"), spaces())), - upper_bound, - ).map(|(field, lower, upper)| UserInputLeaf::Range { - field, - lower, - upper + if higher_bound == "*" { + UserInputBound::Unbounded + } else { + if boundary_char == '}' { + UserInputBound::Exclusive(higher_bound) + } else { + UserInputBound::Inclusive(higher_bound) + } + }); + // return only lower and upper + let lower_to_upper = (lower_bound. + skip((spaces(), + string("TO"), + spaces())), + upper_bound); + + (optional(field()).skip(spaces()), + // try elastic first, if it matches, the range is unbounded + attempt(elastic_unbounded_range).or(lower_to_upper)) + .map(|(field, (lower, upper))| + // Construct the leaf from extracted field (optional) + // and bounds + UserInputLeaf::Range { + field, + lower, + upper }) } } @@ -259,6 +302,49 @@ mod test { } #[test] + fn test_parse_elastic_query_ranges() { + test_parse_query_to_ast_helper("title: >a", "title:{\"a\" TO \"*\"}"); + test_parse_query_to_ast_helper("title:>=a", "title:[\"a\" TO \"*\"}"); + test_parse_query_to_ast_helper("title: <a", "title:{\"*\" TO \"a\"}"); + test_parse_query_to_ast_helper("title:<=a", "title:{\"*\" TO \"a\"]"); + test_parse_query_to_ast_helper("title:<=bsd", "title:{\"*\" TO \"bsd\"]"); + + test_parse_query_to_ast_helper("weight: >70", "weight:{\"70\" TO \"*\"}"); + test_parse_query_to_ast_helper("weight:>=70", "weight:[\"70\" TO \"*\"}"); + test_parse_query_to_ast_helper("weight: <70", "weight:{\"*\" TO \"70\"}"); + test_parse_query_to_ast_helper("weight:<=70", "weight:{\"*\" TO \"70\"]"); + test_parse_query_to_ast_helper("weight: >60.7", "weight:{\"60.7\" TO \"*\"}"); + + test_parse_query_to_ast_helper("weight: <= 70", "weight:{\"*\" TO \"70\"]"); + + test_parse_query_to_ast_helper("weight: <= 70.5", "weight:{\"*\" TO \"70.5\"]"); + } + + #[test] + fn test_range_parser() { + // testing the range() parser separately + let res = range().parse("title: <hello").unwrap().0; + let expected = UserInputLeaf::Range { + field: Some("title".to_string()), + lower: UserInputBound::Unbounded, + upper: UserInputBound::Exclusive("hello".to_string()), + }; + let res2 = range().parse("title:{* TO hello}").unwrap().0; + assert_eq!(res, expected); + assert_eq!(res2, expected); + let expected_weight = UserInputLeaf::Range { + field: Some("weight".to_string()), + lower: UserInputBound::Inclusive("71.2".to_string()), + upper: UserInputBound::Unbounded, + }; + + let res3 = range().parse("weight: >=71.2").unwrap().0; + let res4 = range().parse("weight:[71.2 TO *}").unwrap().0; + assert_eq!(res3, expected_weight); + assert_eq!(res4, expected_weight); + } + + #[test] fn test_parse_query_to_triming_spaces() { test_parse_query_to_ast_helper(" abc", "\"abc\""); test_parse_query_to_ast_helper("abc ", "\"abc\""); @@ -291,7 +377,7 @@ mod test { test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]"); test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}"); test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}"); - test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}"); + test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:{\"*\" TO \"toto\"}"); test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}"); test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}"); test_is_parse_err("abc + "); diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index e5b6f5e..0e697f0 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -369,6 +369,7 @@ impl QueryParser { match *bound { UserInputBound::Inclusive(_) => Ok(Bound::Included(term)), UserInputBound::Exclusive(_) => Ok(Bound::Excluded(term)), + UserInputBound::Unbounded => Ok(Bound::Unbounded), } } diff --git a/src/query/query_parser/user_input_ast.rs b/src/query/query_parser/user_input_ast.rs index dc907ed..6965243 100644 --- a/src/query/query_parser/user_input_ast.rs +++ b/src/query/query_parser/user_input_ast.rs @@ -3,6 +3,7 @@ use std::fmt::{Debug, Formatter}; use crate::query::Occur; +#[derive(PartialEq)] pub enum UserInputLeaf { Literal(UserInputLiteral), All, @@ -35,6 +36,7 @@ impl Debug for UserInputLeaf { } } +#[derive(PartialEq)] pub struct UserInputLiteral { pub field_name: Option<String>, pub phrase: String, @@ -49,9 +51,11 @@ impl fmt::Debug for UserInputLiteral { } } +#[derive(PartialEq)] pub enum UserInputBound { Inclusive(String), Exclusive(String), + Unbounded, } impl UserInputBound { @@ -59,6 +63,7 @@ impl UserInputBound { match *self { UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{}\"", word), UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{}\"", word), + UserInputBound::Unbounded => write!(formatter, "{{\"*\""), } } @@ -66,6 +71,7 @@ impl UserInputBound { match *self { UserInputBound::Inclusive(ref word) => write!(formatter, "\"{}\"]", word), UserInputBound::Exclusive(ref word) => write!(formatter, "\"{}\"}}", word), + UserInputBound::Unbounded => write!(formatter, "\"*\"}}"), } } @@ -73,6 +79,7 @@ impl UserInputBound { match *self { UserInputBound::Inclusive(ref contents) => contents, UserInputBound::Exclusive(ref contents) => contents, + UserInputBound::Unbounded => &"*", } } } diff --git a/src/query/range_query.rs b/src/query/range_query.rs index daaa9f2..e9f034e 100644 --- a/src/query/range_query.rs +++ b/src/query/range_query.rs @@ -338,39 +338,33 @@ mod tests { use crate::collector::Count; use crate::schema::{Document, Field, Schema, INDEXED}; use crate::Index; - use crate::Result; use std::collections::Bound; #[test] fn test_range_query_simple() { - fn run() -> Result<()> { - let mut schema_builder = Schema::builder(); - let year_field = schema_builder.add_u64_field("year", INDEXED); - let schema = schema_builder.build(); - - let index = Index::create_in_ram(schema); - { - let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap(); - for year in 1950u64..2017u64 { - let num_docs_within_year = 10 + (year - 1950) * (year - 1950); - for _ in 0..num_docs_within_year { - index_writer.add_document(doc!(year_field => year)); - } + let mut schema_builder = Schema::builder(); + let year_field = schema_builder.add_u64_field("year", INDEXED); + let schema = schema_builder.build(); + + let index = Index::create_in_ram(schema); + { + let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap(); + for year in 1950u64..2017u64 { + let num_docs_within_year = 10 + (year - 1950) * (year - 1950); + for _ in 0..num_docs_within_year { + index_writer.add_document(doc!(year_field => year)); } - index_writer.commit().unwrap(); } - let reader = index.reader().unwrap(); - let searcher = reader.searcher(); - - let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64); - - // ... or `1960..=1969` if inclusive range is enabled. - let count = searcher.search(&docs_in_the_sixties, &Count)?; - assert_eq!(count, 2285); - Ok(()) + index_writer.commit().unwrap(); } + let reader = index.reader().unwrap(); + let searcher = reader.searcher(); + + let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64); - run().unwrap(); + // ... or `1960..=1969` if inclusive range is enabled. + let count = searcher.search(&docs_in_the_sixties, &Count).unwrap(); + assert_eq!(count, 2285); } #[test] |