Elastic unbounded range query (#624)

* Tidy up fmt remove unneccessary -> Result<()> followed by run.unwrap() in a test * Adding support for elasticsearch-style unbounded queries Extend the UserInputBound to include Unbounded, so we can reuse formatting and internal query format * Still working on elastic-style range queries Fixes #498 Merge the elastic_range into range Reformat to make code easier to follow, use optional() macro to return Some * Fixed bugs Made the range parser insensitive to whitespace between the ":" and the range. Removed optional parsing of field. Added a unit test for the range parser. Derived PartialEq to compare the results of parsing as structs, instead of strings. Found a bug with that unit test - "*}" was parsed as an UserInputBound::Exclusive, instead of UserInputBound::Unbounded. Added an early detection-and-return for * in the original range parser * Correct failing test Assume that we will use "{*" for Unbounded ranges * Add a note in the changelog cargo-fmt * Moved parenthesis to a newline to make nested if-else more visible
author: petr-tik <petr-tik@users.noreply.github.com> 2019-08-12 00:24:47 +0100
committer: Paul Masurel <paul.masurel@gmail.com> 2019-08-12 08:24:47 +0900
commit: 028b0a749c263fae4db8705ec2b14386a52b36b5 (patch)
tree: 21a2ee955306363106b121112e6dd28a0a9c4cc3
parent: 941f06eb9fda6ad0f6cfcc77043a09f66e78d465 (diff)
5 files changed, 128 insertions, 39 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf482e3..063d47b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ Tantivy 0.11.0
 - Various bugfixes in the query parser.
     - Better handling of hyphens in query parser. (#609)
     - Better handling of whitespaces.
+- Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik)
 
 
 Tantivy 0.10.1
diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs
index 2ec2bf7..4f794a8 100644
--- a/src/query/query_parser/query_grammar.rs
+++ b/src/query/query_parser/query_grammar.rs
@@ -83,28 +83,71 @@ parser! {
 }
 
 parser! {
+    /// Function that parses a range out of a Stream
+    /// Supports ranges like:
+    /// [5 TO 10], {5 TO 10}, [* TO 10], [10 TO *], {10 TO *], >5, <=10
+    /// [a TO *], [a TO c], [abc TO bcd}
     fn range[I]()(I) -> UserInputLeaf
     where [I: Stream<Item = char>] {
         let range_term_val = || {
             word().or(negative_number()).or(char('*').with(value("*".to_string())))
         };
+
+        // check for unbounded range in the form of <5, <=10, >5, >=5
+        let elastic_unbounded_range = (choice([attempt(string(">=")),
+                                               attempt(string("<=")),
+                                               attempt(string("<")),
+                                               attempt(string(">"))])
+                                       .skip(spaces()),
+                                       range_term_val()).
+            map(|(comparison_sign, bound): (&str, String)|
+                match comparison_sign {
+                    ">=" => return (UserInputBound::Inclusive(bound), UserInputBound::Unbounded),
+                    "<=" => return (UserInputBound::Unbounded, UserInputBound::Inclusive(bound)),
+                    "<" => return (UserInputBound::Unbounded, UserInputBound::Exclusive(bound)),
+                    ">" => return (UserInputBound::Exclusive(bound), UserInputBound::Unbounded),
+                    // default case
+                    _ => return (UserInputBound::Unbounded, UserInputBound::Unbounded)
+                });
         let lower_bound = (one_of("{[".chars()), range_term_val())
             .map(|(boundary_char, lower_bound): (char, String)|
-                if boundary_char == '{' { UserInputBound::Exclusive(lower_bound) }
-                else { UserInputBound::Inclusive(lower_bound) });
+                 if lower_bound == "*" {
+                     UserInputBound::Unbounded
+                 } else {
+                     if boundary_char == '{' {
+                         UserInputBound::Exclusive(lower_bound)
+                     } else {
+                         UserInputBound::Inclusive(lower_bound)
+                     }
+                 });
         let upper_bound = (range_term_val(), one_of("}]".chars()))
             .map(|(higher_bound, boundary_char): (String, char)|
-                if boundary_char == '}' { UserInputBound::Exclusive(higher_bound) }
-                else { UserInputBound::Inclusive(higher_bound) });
-        (
-            optional(field()),
-            lower_bound
-            .skip((spaces(), string("TO"), spaces())),
-            upper_bound,
-        ).map(|(field, lower, upper)| UserInputLeaf::Range {
-                field,
-                lower,
-                upper
+                 if higher_bound == "*" {
+                     UserInputBound::Unbounded
+                 } else {
+                     if boundary_char == '}' {
+                         UserInputBound::Exclusive(higher_bound)
+                     } else {
+                         UserInputBound::Inclusive(higher_bound)
+                     }
+                 });
+         // return only lower and upper
+        let lower_to_upper = (lower_bound.
+                                    skip((spaces(),
+                                          string("TO"),
+                                          spaces())),
+                                    upper_bound);
+
+        (optional(field()).skip(spaces()),
+         // try elastic first, if it matches, the range is unbounded
+         attempt(elastic_unbounded_range).or(lower_to_upper))
+            .map(|(field, (lower, upper))|
+                 // Construct the leaf from extracted field (optional)
+                 // and bounds
+                 UserInputLeaf::Range {
+                     field,
+                     lower,
+                     upper
         })
     }
 }
@@ -259,6 +302,49 @@ mod test {
     }
 
     #[test]
+    fn test_parse_elastic_query_ranges() {
+        test_parse_query_to_ast_helper("title: >a", "title:{\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title:>=a", "title:[\"a\" TO \"*\"}");
+        test_parse_query_to_ast_helper("title: <a", "title:{\"*\" TO \"a\"}");
+        test_parse_query_to_ast_helper("title:<=a", "title:{\"*\" TO \"a\"]");
+        test_parse_query_to_ast_helper("title:<=bsd", "title:{\"*\" TO \"bsd\"]");
+
+        test_parse_query_to_ast_helper("weight: >70", "weight:{\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight:>=70", "weight:[\"70\" TO \"*\"}");
+        test_parse_query_to_ast_helper("weight: <70", "weight:{\"*\" TO \"70\"}");
+        test_parse_query_to_ast_helper("weight:<=70", "weight:{\"*\" TO \"70\"]");
+        test_parse_query_to_ast_helper("weight: >60.7", "weight:{\"60.7\" TO \"*\"}");
+
+        test_parse_query_to_ast_helper("weight: <= 70", "weight:{\"*\" TO \"70\"]");
+
+        test_parse_query_to_ast_helper("weight: <= 70.5", "weight:{\"*\" TO \"70.5\"]");
+    }
+
+    #[test]
+    fn test_range_parser() {
+        // testing the range() parser separately
+        let res = range().parse("title: <hello").unwrap().0;
+        let expected = UserInputLeaf::Range {
+            field: Some("title".to_string()),
+            lower: UserInputBound::Unbounded,
+            upper: UserInputBound::Exclusive("hello".to_string()),
+        };
+        let res2 = range().parse("title:{* TO hello}").unwrap().0;
+        assert_eq!(res, expected);
+        assert_eq!(res2, expected);
+        let expected_weight = UserInputLeaf::Range {
+            field: Some("weight".to_string()),
+            lower: UserInputBound::Inclusive("71.2".to_string()),
+            upper: UserInputBound::Unbounded,
+        };
+
+        let res3 = range().parse("weight: >=71.2").unwrap().0;
+        let res4 = range().parse("weight:[71.2 TO *}").unwrap().0;
+        assert_eq!(res3, expected_weight);
+        assert_eq!(res4, expected_weight);
+    }
+
+    #[test]
     fn test_parse_query_to_triming_spaces() {
         test_parse_query_to_ast_helper("   abc", "\"abc\"");
         test_parse_query_to_ast_helper("abc ", "\"abc\"");
@@ -291,7 +377,7 @@ mod test {
         test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]");
         test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}");
         test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}");
-        test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}");
+        test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:{\"*\" TO \"toto\"}");
         test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
         test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
         test_is_parse_err("abc +    ");
diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs
index e5b6f5e..0e697f0 100644
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -369,6 +369,7 @@ impl QueryParser {
         match *bound {
             UserInputBound::Inclusive(_) => Ok(Bound::Included(term)),
             UserInputBound::Exclusive(_) => Ok(Bound::Excluded(term)),
+            UserInputBound::Unbounded => Ok(Bound::Unbounded),
         }
     }
 
diff --git a/src/query/query_parser/user_input_ast.rs b/src/query/query_parser/user_input_ast.rs
index dc907ed..6965243 100644
--- a/src/query/query_parser/user_input_ast.rs
+++ b/src/query/query_parser/user_input_ast.rs
@@ -3,6 +3,7 @@ use std::fmt::{Debug, Formatter};
 
 use crate::query::Occur;
 
+#[derive(PartialEq)]
 pub enum UserInputLeaf {
     Literal(UserInputLiteral),
     All,
@@ -35,6 +36,7 @@ impl Debug for UserInputLeaf {
     }
 }
 
+#[derive(PartialEq)]
 pub struct UserInputLiteral {
     pub field_name: Option<String>,
     pub phrase: String,
@@ -49,9 +51,11 @@ impl fmt::Debug for UserInputLiteral {
     }
 }
 
+#[derive(PartialEq)]
 pub enum UserInputBound {
     Inclusive(String),
     Exclusive(String),
+    Unbounded,
 }
 
 impl UserInputBound {
@@ -59,6 +63,7 @@ impl UserInputBound {
         match *self {
             UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{}\"", word),
             UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{}\"", word),
+            UserInputBound::Unbounded => write!(formatter, "{{\"*\""),
         }
     }
 
@@ -66,6 +71,7 @@ impl UserInputBound {
         match *self {
             UserInputBound::Inclusive(ref word) => write!(formatter, "\"{}\"]", word),
             UserInputBound::Exclusive(ref word) => write!(formatter, "\"{}\"}}", word),
+            UserInputBound::Unbounded => write!(formatter, "\"*\"}}"),
         }
     }
 
@@ -73,6 +79,7 @@ impl UserInputBound {
         match *self {
             UserInputBound::Inclusive(ref contents) => contents,
             UserInputBound::Exclusive(ref contents) => contents,
+            UserInputBound::Unbounded => &"*",
         }
     }
 }
diff --git a/src/query/range_query.rs b/src/query/range_query.rs
index daaa9f2..e9f034e 100644
--- a/src/query/range_query.rs
+++ b/src/query/range_query.rs
@@ -338,39 +338,33 @@ mod tests {
     use crate::collector::Count;
     use crate::schema::{Document, Field, Schema, INDEXED};
     use crate::Index;
-    use crate::Result;
     use std::collections::Bound;
 
     #[test]
     fn test_range_query_simple() {
-        fn run() -> Result<()> {
-            let mut schema_builder = Schema::builder();
-            let year_field = schema_builder.add_u64_field("year", INDEXED);
-            let schema = schema_builder.build();
-
-            let index = Index::create_in_ram(schema);
-            {
-                let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
-                for year in 1950u64..2017u64 {
-                    let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
-                    for _ in 0..num_docs_within_year {
-                        index_writer.add_document(doc!(year_field => year));
-                    }
+        let mut schema_builder = Schema::builder();
+        let year_field = schema_builder.add_u64_field("year", INDEXED);
+        let schema = schema_builder.build();
+
+        let index = Index::create_in_ram(schema);
+        {
+            let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
+            for year in 1950u64..2017u64 {
+                let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
+                for _ in 0..num_docs_within_year {
+                    index_writer.add_document(doc!(year_field => year));
                 }
-                index_writer.commit().unwrap();
             }
-            let reader = index.reader().unwrap();
-            let searcher = reader.searcher();
-
-            let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
-
-            // ... or `1960..=1969` if inclusive range is enabled.
-            let count = searcher.search(&docs_in_the_sixties, &Count)?;
-            assert_eq!(count, 2285);
-            Ok(())
+            index_writer.commit().unwrap();
         }
+        let reader = index.reader().unwrap();
+        let searcher = reader.searcher();
+
+        let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
 
-        run().unwrap();
+        // ... or `1960..=1969` if inclusive range is enabled.
+        let count = searcher.search(&docs_in_the_sixties, &Count).unwrap();
+        assert_eq!(count, 2285);
     }
 
     #[test]
author	petr-tik <petr-tik@users.noreply.github.com>	2019-08-12 00:24:47 +0100
committer	Paul Masurel <paul.masurel@gmail.com>	2019-08-12 08:24:47 +0900
commit	028b0a749c263fae4db8705ec2b14386a52b36b5 (patch)
tree	21a2ee955306363106b121112e6dd28a0a9c4cc3
parent	941f06eb9fda6ad0f6cfcc77043a09f66e78d465 (diff)