Disable Unicode mode for literal regex.

When ripgrep detects a literal, it emits them as raw hex escaped byte sequences to Regex::new. This permits literal optimizations for arbitrary byte sequences (i.e., possibly invalid UTF-8). The problem is that Regex::new interprets hex escaped byte sequences as *Unicode codepoints* by default, but we want them to actually stand for their raw byte values. Therefore, disable Unicode mode. This is OK, since the regex is composed entirely of literals and literal extraction does Unicode case folding. Fixes #251
author: Andrew Gallant <jamslam@gmail.com> 2016-11-28 18:31:58 -0500
committer: Andrew Gallant <jamslam@gmail.com> 2016-11-28 18:31:58 -0500
commit: 0473df1ef5721143941fb7f883e22b17292b35bb (patch)
tree: 5a3ad00a963f619e090051bdd29fdf46a12bc523 /tests
parent: 301a3fd71d3a923419d6d3e7604979b314121801 (diff)
1 files changed, 9 insertions, 0 deletions
diff --git a/tests/tests.rs b/tests/tests.rs
index 5c152b99..876ee407 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -936,6 +936,15 @@ clean!(regression_229, "[E]conomie", ".", |wd: WorkDir, mut cmd: Command| {
     wd.assert_err(&mut cmd);
 });
 
+// See: https://github.com/BurntSushi/ripgrep/issues/251
+clean!(regression_251, "привет", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create("foo", "привет\nПривет\nПрИвЕт");
+    cmd.arg("-i");
+
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "foo:привет\nfoo:Привет\nfoo:ПрИвЕт\n");
+});
+
 // See: https://github.com/BurntSushi/ripgrep/issues/7
 sherlock!(feature_7, "-fpat", "sherlock", |wd: WorkDir, mut cmd: Command| {
     wd.create("pat", "Sherlock\nHolmes");
author	Andrew Gallant <jamslam@gmail.com>	2016-11-28 18:31:58 -0500
committer	Andrew Gallant <jamslam@gmail.com>	2016-11-28 18:31:58 -0500
commit	0473df1ef5721143941fb7f883e22b17292b35bb (patch)
tree	5a3ad00a963f619e090051bdd29fdf46a12bc523 /tests
parent	301a3fd71d3a923419d6d3e7604979b314121801 (diff)