diff options
author | Paul Masurel <paul.masurel@gmail.com> | 2019-08-06 20:33:30 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-08-06 20:33:30 +0900 |
commit | 143f78eced5e545231087ba5fb233167de00819e (patch) | |
tree | 837cd61725f0809eeb3869b4f348bdc5f565042c | |
parent | 754b55eee5393557199464eec0b528e57f35cbac (diff) |
Trying to fix #609 (#616)
-rw-r--r-- | CHANGELOG.md | 1 | ||||
-rw-r--r-- | Cargo.toml | 5 | ||||
-rw-r--r-- | src/query/query_parser/query_grammar.rs | 39 | ||||
-rw-r--r-- | src/query/query_parser/query_parser.rs | 21 |
4 files changed, 49 insertions, 17 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index 2cf860c..3784120 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ Tantivy 0.11.0 ===================== - Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima) +- Closes #609. Better handling of hyphens in query parser. Tantivy 0.10.1 ===================== @@ -1,6 +1,6 @@ [package] name = "tantivy" -version = "0.10.1" +version = "0.11.0" authors = ["Paul Masurel <paul.masurel@gmail.com>"] license = "MIT" categories = ["database-implementations", "data-structures"] @@ -86,7 +86,6 @@ travis-ci = { repository = "tantivy-search/tantivy" } [dev-dependencies.fail] features = ["failpoints"] - # Following the "fail" crate best practises, we isolate # tests that define specific behavior in fail check points # in a different binary. @@ -97,4 +96,4 @@ features = ["failpoints"] [[test]] name = "failpoints" path = "tests/failpoints/mod.rs" -required-features = ["fail/failpoints"]
\ No newline at end of file +required-features = ["fail/failpoints"] diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs index a3df714..9d3348a 100644 --- a/src/query/query_parser/query_grammar.rs +++ b/src/query/query_parser/query_grammar.rs @@ -20,15 +20,19 @@ parser! { parser! { fn word[I]()(I) -> String where [I: Stream<Item = char>] { - many1(satisfy(|c: char| c.is_alphanumeric() || c=='.')) - .and_then(|s: String| { - match s.as_str() { - "OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")), - "AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")), - "NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")), - _ => Ok(s) - } - }) + ( + satisfy(|c: char| c.is_alphanumeric()), + many(satisfy(|c: char| !c.is_whitespace() && ![':', '{', '}', '"', '[', ']', '(',')'].contains(&c))) + ) + .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2)) + .and_then(|s: String| { + match s.as_str() { + "OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")), + "AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")), + "NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")), + _ => Ok(s) + } + }) } } @@ -115,9 +119,7 @@ parser! { ) ) .or(attempt( - range().map(UserInputAST::from) - ) - ) + range().map(UserInputAST::from))) .or(literal().map(|leaf| UserInputAST::Leaf(Box::new(leaf)))) } } @@ -226,6 +228,13 @@ mod test { } #[test] + fn test_parse_query_to_ast_hyphen() { + test_parse_query_to_ast_helper("\"www-form-encoded\"", "\"www-form-encoded\""); + test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\""); + test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\""); + } + + #[test] fn test_parse_query_to_ast_not_op() { assert_eq!( format!("{:?}", parse_to_ast().parse("NOT")), @@ -272,6 +281,11 @@ mod test { test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")"); test_parse_query_to_ast_helper("abc:a b", "(abc:\"a\" \"b\")"); test_parse_query_to_ast_helper("abc:\"a b\"", "abc:\"a b\""); + test_is_parse_err("abc + "); + } + + #[test] + fn test_parse_query_to_ast_range() { test_parse_query_to_ast_helper("foo:[1 TO 5]", "foo:[\"1\" TO \"5\"]"); test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]"); test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}"); @@ -279,6 +293,5 @@ mod test { test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}"); test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}"); test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}"); - test_is_parse_err("abc + "); } } diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 715040f..d32546b 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -690,7 +690,7 @@ mod test { } #[test] - pub fn test_parse_query_to_ast_disjunction() { + pub fn test_parse_query_to_ast_single_term() { test_parse_query_to_logical_ast_helper( "title:toto", "Term([0, 0, 0, 0, 116, 111, 116, 111])", @@ -714,6 +714,10 @@ mod test { .unwrap(), QueryParserError::AllButQueryForbidden ); + } + + #[test] + pub fn test_parse_query_to_ast_two_terms() { test_parse_query_to_logical_ast_helper( "title:a b", "(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \ @@ -726,6 +730,10 @@ mod test { (1, Term([0, 0, 0, 0, 98]))]\"", false, ); + } + + #[test] + pub fn test_parse_query_to_ast_ranges() { test_parse_query_to_logical_ast_helper( "title:[a TO b]", "(Included(Term([0, 0, 0, 0, 97])) TO \ @@ -893,4 +901,15 @@ mod test { true, ); } + + #[test] + pub fn test_query_parser_hyphen() { + test_parse_query_to_logical_ast_helper( + "title:www-form-encoded", + "\"[(0, Term([0, 0, 0, 0, 119, 119, 119])), \ + (1, Term([0, 0, 0, 0, 102, 111, 114, 109])), \ + (2, Term([0, 0, 0, 0, 101, 110, 99, 111, 100, 101, 100]))]\"", + false, + ); + } } |