summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpetr-tik <petr-tik@users.noreply.github.com>2019-08-12 00:24:47 +0100
committerPaul Masurel <paul.masurel@gmail.com>2019-08-12 08:24:47 +0900
commit028b0a749c263fae4db8705ec2b14386a52b36b5 (patch)
tree21a2ee955306363106b121112e6dd28a0a9c4cc3
parent941f06eb9fda6ad0f6cfcc77043a09f66e78d465 (diff)
Elastic unbounded range query (#624)
* Tidy up fmt remove unneccessary -> Result<()> followed by run.unwrap() in a test * Adding support for elasticsearch-style unbounded queries Extend the UserInputBound to include Unbounded, so we can reuse formatting and internal query format * Still working on elastic-style range queries Fixes #498 Merge the elastic_range into range Reformat to make code easier to follow, use optional() macro to return Some * Fixed bugs Made the range parser insensitive to whitespace between the ":" and the range. Removed optional parsing of field. Added a unit test for the range parser. Derived PartialEq to compare the results of parsing as structs, instead of strings. Found a bug with that unit test - "*}" was parsed as an UserInputBound::Exclusive, instead of UserInputBound::Unbounded. Added an early detection-and-return for * in the original range parser * Correct failing test Assume that we will use "{*" for Unbounded ranges * Add a note in the changelog cargo-fmt * Moved parenthesis to a newline to make nested if-else more visible
-rw-r--r--CHANGELOG.md1
-rw-r--r--src/query/query_parser/query_grammar.rs114
-rw-r--r--src/query/query_parser/query_parser.rs1
-rw-r--r--src/query/query_parser/user_input_ast.rs7
-rw-r--r--src/query/range_query.rs44
5 files changed, 128 insertions, 39 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index cf482e3..063d47b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ Tantivy 0.11.0
- Various bugfixes in the query parser.
- Better handling of hyphens in query parser. (#609)
- Better handling of whitespaces.
+- Closes #498 - add support for Elastic-style unbounded range queries for alphanumeric types eg. "title:>hello", "weight:>=70.5", "height:<200" (@petr-tik)
Tantivy 0.10.1
diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs
index 2ec2bf7..4f794a8 100644
--- a/src/query/query_parser/query_grammar.rs
+++ b/src/query/query_parser/query_grammar.rs
@@ -83,28 +83,71 @@ parser! {
}
parser! {
+ /// Function that parses a range out of a Stream
+ /// Supports ranges like:
+ /// [5 TO 10], {5 TO 10}, [* TO 10], [10 TO *], {10 TO *], >5, <=10
+ /// [a TO *], [a TO c], [abc TO bcd}
fn range[I]()(I) -> UserInputLeaf
where [I: Stream<Item = char>] {
let range_term_val = || {
word().or(negative_number()).or(char('*').with(value("*".to_string())))
};
+
+ // check for unbounded range in the form of <5, <=10, >5, >=5
+ let elastic_unbounded_range = (choice([attempt(string(">=")),
+ attempt(string("<=")),
+ attempt(string("<")),
+ attempt(string(">"))])
+ .skip(spaces()),
+ range_term_val()).
+ map(|(comparison_sign, bound): (&str, String)|
+ match comparison_sign {
+ ">=" => return (UserInputBound::Inclusive(bound), UserInputBound::Unbounded),
+ "<=" => return (UserInputBound::Unbounded, UserInputBound::Inclusive(bound)),
+ "<" => return (UserInputBound::Unbounded, UserInputBound::Exclusive(bound)),
+ ">" => return (UserInputBound::Exclusive(bound), UserInputBound::Unbounded),
+ // default case
+ _ => return (UserInputBound::Unbounded, UserInputBound::Unbounded)
+ });
let lower_bound = (one_of("{[".chars()), range_term_val())
.map(|(boundary_char, lower_bound): (char, String)|
- if boundary_char == '{' { UserInputBound::Exclusive(lower_bound) }
- else { UserInputBound::Inclusive(lower_bound) });
+ if lower_bound == "*" {
+ UserInputBound::Unbounded
+ } else {
+ if boundary_char == '{' {
+ UserInputBound::Exclusive(lower_bound)
+ } else {
+ UserInputBound::Inclusive(lower_bound)
+ }
+ });
let upper_bound = (range_term_val(), one_of("}]".chars()))
.map(|(higher_bound, boundary_char): (String, char)|
- if boundary_char == '}' { UserInputBound::Exclusive(higher_bound) }
- else { UserInputBound::Inclusive(higher_bound) });
- (
- optional(field()),
- lower_bound
- .skip((spaces(), string("TO"), spaces())),
- upper_bound,
- ).map(|(field, lower, upper)| UserInputLeaf::Range {
- field,
- lower,
- upper
+ if higher_bound == "*" {
+ UserInputBound::Unbounded
+ } else {
+ if boundary_char == '}' {
+ UserInputBound::Exclusive(higher_bound)
+ } else {
+ UserInputBound::Inclusive(higher_bound)
+ }
+ });
+ // return only lower and upper
+ let lower_to_upper = (lower_bound.
+ skip((spaces(),
+ string("TO"),
+ spaces())),
+ upper_bound);
+
+ (optional(field()).skip(spaces()),
+ // try elastic first, if it matches, the range is unbounded
+ attempt(elastic_unbounded_range).or(lower_to_upper))
+ .map(|(field, (lower, upper))|
+ // Construct the leaf from extracted field (optional)
+ // and bounds
+ UserInputLeaf::Range {
+ field,
+ lower,
+ upper
})
}
}
@@ -259,6 +302,49 @@ mod test {
}
#[test]
+ fn test_parse_elastic_query_ranges() {
+ test_parse_query_to_ast_helper("title: >a", "title:{\"a\" TO \"*\"}");
+ test_parse_query_to_ast_helper("title:>=a", "title:[\"a\" TO \"*\"}");
+ test_parse_query_to_ast_helper("title: <a", "title:{\"*\" TO \"a\"}");
+ test_parse_query_to_ast_helper("title:<=a", "title:{\"*\" TO \"a\"]");
+ test_parse_query_to_ast_helper("title:<=bsd", "title:{\"*\" TO \"bsd\"]");
+
+ test_parse_query_to_ast_helper("weight: >70", "weight:{\"70\" TO \"*\"}");
+ test_parse_query_to_ast_helper("weight:>=70", "weight:[\"70\" TO \"*\"}");
+ test_parse_query_to_ast_helper("weight: <70", "weight:{\"*\" TO \"70\"}");
+ test_parse_query_to_ast_helper("weight:<=70", "weight:{\"*\" TO \"70\"]");
+ test_parse_query_to_ast_helper("weight: >60.7", "weight:{\"60.7\" TO \"*\"}");
+
+ test_parse_query_to_ast_helper("weight: <= 70", "weight:{\"*\" TO \"70\"]");
+
+ test_parse_query_to_ast_helper("weight: <= 70.5", "weight:{\"*\" TO \"70.5\"]");
+ }
+
+ #[test]
+ fn test_range_parser() {
+ // testing the range() parser separately
+ let res = range().parse("title: <hello").unwrap().0;
+ let expected = UserInputLeaf::Range {
+ field: Some("title".to_string()),
+ lower: UserInputBound::Unbounded,
+ upper: UserInputBound::Exclusive("hello".to_string()),
+ };
+ let res2 = range().parse("title:{* TO hello}").unwrap().0;
+ assert_eq!(res, expected);
+ assert_eq!(res2, expected);
+ let expected_weight = UserInputLeaf::Range {
+ field: Some("weight".to_string()),
+ lower: UserInputBound::Inclusive("71.2".to_string()),
+ upper: UserInputBound::Unbounded,
+ };
+
+ let res3 = range().parse("weight: >=71.2").unwrap().0;
+ let res4 = range().parse("weight:[71.2 TO *}").unwrap().0;
+ assert_eq!(res3, expected_weight);
+ assert_eq!(res4, expected_weight);
+ }
+
+ #[test]
fn test_parse_query_to_triming_spaces() {
test_parse_query_to_ast_helper(" abc", "\"abc\"");
test_parse_query_to_ast_helper("abc ", "\"abc\"");
@@ -291,7 +377,7 @@ mod test {
test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]");
test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}");
test_parse_query_to_ast_helper("foo:[1 TO toto}", "foo:[\"1\" TO \"toto\"}");
- test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}");
+ test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:{\"*\" TO \"toto\"}");
test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
test_is_parse_err("abc + ");
diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs
index e5b6f5e..0e697f0 100644
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -369,6 +369,7 @@ impl QueryParser {
match *bound {
UserInputBound::Inclusive(_) => Ok(Bound::Included(term)),
UserInputBound::Exclusive(_) => Ok(Bound::Excluded(term)),
+ UserInputBound::Unbounded => Ok(Bound::Unbounded),
}
}
diff --git a/src/query/query_parser/user_input_ast.rs b/src/query/query_parser/user_input_ast.rs
index dc907ed..6965243 100644
--- a/src/query/query_parser/user_input_ast.rs
+++ b/src/query/query_parser/user_input_ast.rs
@@ -3,6 +3,7 @@ use std::fmt::{Debug, Formatter};
use crate::query::Occur;
+#[derive(PartialEq)]
pub enum UserInputLeaf {
Literal(UserInputLiteral),
All,
@@ -35,6 +36,7 @@ impl Debug for UserInputLeaf {
}
}
+#[derive(PartialEq)]
pub struct UserInputLiteral {
pub field_name: Option<String>,
pub phrase: String,
@@ -49,9 +51,11 @@ impl fmt::Debug for UserInputLiteral {
}
}
+#[derive(PartialEq)]
pub enum UserInputBound {
Inclusive(String),
Exclusive(String),
+ Unbounded,
}
impl UserInputBound {
@@ -59,6 +63,7 @@ impl UserInputBound {
match *self {
UserInputBound::Inclusive(ref word) => write!(formatter, "[\"{}\"", word),
UserInputBound::Exclusive(ref word) => write!(formatter, "{{\"{}\"", word),
+ UserInputBound::Unbounded => write!(formatter, "{{\"*\""),
}
}
@@ -66,6 +71,7 @@ impl UserInputBound {
match *self {
UserInputBound::Inclusive(ref word) => write!(formatter, "\"{}\"]", word),
UserInputBound::Exclusive(ref word) => write!(formatter, "\"{}\"}}", word),
+ UserInputBound::Unbounded => write!(formatter, "\"*\"}}"),
}
}
@@ -73,6 +79,7 @@ impl UserInputBound {
match *self {
UserInputBound::Inclusive(ref contents) => contents,
UserInputBound::Exclusive(ref contents) => contents,
+ UserInputBound::Unbounded => &"*",
}
}
}
diff --git a/src/query/range_query.rs b/src/query/range_query.rs
index daaa9f2..e9f034e 100644
--- a/src/query/range_query.rs
+++ b/src/query/range_query.rs
@@ -338,39 +338,33 @@ mod tests {
use crate::collector::Count;
use crate::schema::{Document, Field, Schema, INDEXED};
use crate::Index;
- use crate::Result;
use std::collections::Bound;
#[test]
fn test_range_query_simple() {
- fn run() -> Result<()> {
- let mut schema_builder = Schema::builder();
- let year_field = schema_builder.add_u64_field("year", INDEXED);
- let schema = schema_builder.build();
-
- let index = Index::create_in_ram(schema);
- {
- let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
- for year in 1950u64..2017u64 {
- let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
- for _ in 0..num_docs_within_year {
- index_writer.add_document(doc!(year_field => year));
- }
+ let mut schema_builder = Schema::builder();
+ let year_field = schema_builder.add_u64_field("year", INDEXED);
+ let schema = schema_builder.build();
+
+ let index = Index::create_in_ram(schema);
+ {
+ let mut index_writer = index.writer_with_num_threads(1, 6_000_000).unwrap();
+ for year in 1950u64..2017u64 {
+ let num_docs_within_year = 10 + (year - 1950) * (year - 1950);
+ for _ in 0..num_docs_within_year {
+ index_writer.add_document(doc!(year_field => year));
}
- index_writer.commit().unwrap();
}
- let reader = index.reader().unwrap();
- let searcher = reader.searcher();
-
- let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
-
- // ... or `1960..=1969` if inclusive range is enabled.
- let count = searcher.search(&docs_in_the_sixties, &Count)?;
- assert_eq!(count, 2285);
- Ok(())
+ index_writer.commit().unwrap();
}
+ let reader = index.reader().unwrap();
+ let searcher = reader.searcher();
+
+ let docs_in_the_sixties = RangeQuery::new_u64(year_field, 1960u64..1970u64);
- run().unwrap();
+ // ... or `1960..=1969` if inclusive range is enabled.
+ let count = searcher.search(&docs_in_the_sixties, &Count).unwrap();
+ assert_eq!(count, 2285);
}
#[test]