summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul Masurel <paul.masurel@gmail.com>2019-08-06 20:33:30 +0900
committerGitHub <noreply@github.com>2019-08-06 20:33:30 +0900
commit143f78eced5e545231087ba5fb233167de00819e (patch)
tree837cd61725f0809eeb3869b4f348bdc5f565042c
parent754b55eee5393557199464eec0b528e57f35cbac (diff)
Trying to fix #609 (#616)
-rw-r--r--CHANGELOG.md1
-rw-r--r--Cargo.toml5
-rw-r--r--src/query/query_parser/query_grammar.rs39
-rw-r--r--src/query/query_parser/query_parser.rs21
4 files changed, 49 insertions, 17 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2cf860c..3784120 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@ Tantivy 0.11.0
=====================
- Added f64 field. Internally reuse u64 code the same way i64 does (@fdb-hiroshima)
+- Closes #609. Better handling of hyphens in query parser.
Tantivy 0.10.1
=====================
diff --git a/Cargo.toml b/Cargo.toml
index 712ca25..cf01250 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "tantivy"
-version = "0.10.1"
+version = "0.11.0"
authors = ["Paul Masurel <paul.masurel@gmail.com>"]
license = "MIT"
categories = ["database-implementations", "data-structures"]
@@ -86,7 +86,6 @@ travis-ci = { repository = "tantivy-search/tantivy" }
[dev-dependencies.fail]
features = ["failpoints"]
-
# Following the "fail" crate best practises, we isolate
# tests that define specific behavior in fail check points
# in a different binary.
@@ -97,4 +96,4 @@ features = ["failpoints"]
[[test]]
name = "failpoints"
path = "tests/failpoints/mod.rs"
-required-features = ["fail/failpoints"] \ No newline at end of file
+required-features = ["fail/failpoints"]
diff --git a/src/query/query_parser/query_grammar.rs b/src/query/query_parser/query_grammar.rs
index a3df714..9d3348a 100644
--- a/src/query/query_parser/query_grammar.rs
+++ b/src/query/query_parser/query_grammar.rs
@@ -20,15 +20,19 @@ parser! {
parser! {
fn word[I]()(I) -> String
where [I: Stream<Item = char>] {
- many1(satisfy(|c: char| c.is_alphanumeric() || c=='.'))
- .and_then(|s: String| {
- match s.as_str() {
- "OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
- "AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")),
- "NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")),
- _ => Ok(s)
- }
- })
+ (
+ satisfy(|c: char| c.is_alphanumeric()),
+ many(satisfy(|c: char| !c.is_whitespace() && ![':', '{', '}', '"', '[', ']', '(',')'].contains(&c)))
+ )
+ .map(|(s1, s2): (char, String)| format!("{}{}", s1, s2))
+ .and_then(|s: String| {
+ match s.as_str() {
+ "OR" => Err(StreamErrorFor::<I>::unexpected_static_message("OR")),
+ "AND" => Err(StreamErrorFor::<I>::unexpected_static_message("AND")),
+ "NOT" => Err(StreamErrorFor::<I>::unexpected_static_message("NOT")),
+ _ => Ok(s)
+ }
+ })
}
}
@@ -115,9 +119,7 @@ parser! {
)
)
.or(attempt(
- range().map(UserInputAST::from)
- )
- )
+ range().map(UserInputAST::from)))
.or(literal().map(|leaf| UserInputAST::Leaf(Box::new(leaf))))
}
}
@@ -226,6 +228,13 @@ mod test {
}
#[test]
+ fn test_parse_query_to_ast_hyphen() {
+ test_parse_query_to_ast_helper("\"www-form-encoded\"", "\"www-form-encoded\"");
+ test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
+ test_parse_query_to_ast_helper("www-form-encoded", "\"www-form-encoded\"");
+ }
+
+ #[test]
fn test_parse_query_to_ast_not_op() {
assert_eq!(
format!("{:?}", parse_to_ast().parse("NOT")),
@@ -272,6 +281,11 @@ mod test {
test_parse_query_to_ast_helper("-abc:toto", "-(abc:\"toto\")");
test_parse_query_to_ast_helper("abc:a b", "(abc:\"a\" \"b\")");
test_parse_query_to_ast_helper("abc:\"a b\"", "abc:\"a b\"");
+ test_is_parse_err("abc + ");
+ }
+
+ #[test]
+ fn test_parse_query_to_ast_range() {
test_parse_query_to_ast_helper("foo:[1 TO 5]", "foo:[\"1\" TO \"5\"]");
test_parse_query_to_ast_helper("[1 TO 5]", "[\"1\" TO \"5\"]");
test_parse_query_to_ast_helper("foo:{a TO z}", "foo:{\"a\" TO \"z\"}");
@@ -279,6 +293,5 @@ mod test {
test_parse_query_to_ast_helper("foo:[* TO toto}", "foo:[\"*\" TO \"toto\"}");
test_parse_query_to_ast_helper("foo:[1 TO *}", "foo:[\"1\" TO \"*\"}");
test_parse_query_to_ast_helper("foo:[1.1 TO *}", "foo:[\"1.1\" TO \"*\"}");
- test_is_parse_err("abc + ");
}
}
diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs
index 715040f..d32546b 100644
--- a/src/query/query_parser/query_parser.rs
+++ b/src/query/query_parser/query_parser.rs
@@ -690,7 +690,7 @@ mod test {
}
#[test]
- pub fn test_parse_query_to_ast_disjunction() {
+ pub fn test_parse_query_to_ast_single_term() {
test_parse_query_to_logical_ast_helper(
"title:toto",
"Term([0, 0, 0, 0, 116, 111, 116, 111])",
@@ -714,6 +714,10 @@ mod test {
.unwrap(),
QueryParserError::AllButQueryForbidden
);
+ }
+
+ #[test]
+ pub fn test_parse_query_to_ast_two_terms() {
test_parse_query_to_logical_ast_helper(
"title:a b",
"(Term([0, 0, 0, 0, 97]) (Term([0, 0, 0, 0, 98]) \
@@ -726,6 +730,10 @@ mod test {
(1, Term([0, 0, 0, 0, 98]))]\"",
false,
);
+ }
+
+ #[test]
+ pub fn test_parse_query_to_ast_ranges() {
test_parse_query_to_logical_ast_helper(
"title:[a TO b]",
"(Included(Term([0, 0, 0, 0, 97])) TO \
@@ -893,4 +901,15 @@ mod test {
true,
);
}
+
+ #[test]
+ pub fn test_query_parser_hyphen() {
+ test_parse_query_to_logical_ast_helper(
+ "title:www-form-encoded",
+ "\"[(0, Term([0, 0, 0, 0, 119, 119, 119])), \
+ (1, Term([0, 0, 0, 0, 102, 111, 114, 109])), \
+ (2, Term([0, 0, 0, 0, 101, 110, 99, 111, 100, 101, 100]))]\"",
+ false,
+ );
+ }
}