Fixes a bug with --smart-case.

This was a subtle bug, but the big picture was that the smart case information wasn't being carried through to the literal extraction in some cases. When this happened, it was possible to get back an incomplete set of literals, which would therefore miss some valid matches. The fix to this is to actually parse the regex and determine whether smart case applies before doing anything else. It's a little extra work, but parsing is pretty fast. Fixes #199
author: Andrew Gallant <jamslam@gmail.com> 2016-11-06 12:07:47 -0500
committer: Andrew Gallant <jamslam@gmail.com> 2016-11-06 12:07:47 -0500
commit: 0222e024fef6c52fc1cd01b6c7ec01ab38031bce (patch)
tree: f7f09e1a44d48a7b7e92c589317e1ca3c5fa0049
parent: 5bd0edbbe10364d085ff0d314b2513e0e9fbf162 (diff)
3 files changed, 51 insertions, 20 deletions
diff --git a/grep/src/literals.rs b/grep/src/literals.rs
index 4cd34a87..d931f135 100644
--- a/grep/src/literals.rs
+++ b/grep/src/literals.rs
@@ -9,7 +9,7 @@ principled.
 */
 use std::cmp;
 
-use regex::bytes::Regex;
+use regex::bytes::RegexBuilder;
 use syntax::{
     Expr, Literals, Lit,
     ByteClass, ByteRange, CharClass, ClassRange, Repeater,
@@ -33,7 +33,7 @@ impl LiteralSets {
         }
     }
 
-    pub fn to_regex(&self) -> Option<Regex> {
+    pub fn to_regex_builder(&self) -> Option<RegexBuilder> {
         if self.prefixes.all_complete() && !self.prefixes.is_empty() {
             debug!("literal prefixes detected: {:?}", self.prefixes);
             // When this is true, the regex engine will do a literal scan.
@@ -79,14 +79,12 @@ impl LiteralSets {
             debug!("required literals found: {:?}", req_lits);
             let alts: Vec<String> =
                 req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
-            // Literals always compile.
-            Some(Regex::new(&alts.join("|")).unwrap())
+            Some(RegexBuilder::new(&alts.join("|")))
         } else if lit.is_empty() {
             None
         } else {
-            // Literals always compile.
             debug!("required literal found: {:?}", show(lit));
-            Some(Regex::new(&bytes_to_regex(lit)).unwrap())
+            Some(RegexBuilder::new(&bytes_to_regex(lit)))
         }
     }
 }
diff --git a/grep/src/search.rs b/grep/src/search.rs
index c7639937..4e4c48e9 100644
--- a/grep/src/search.rs
+++ b/grep/src/search.rs
@@ -144,14 +144,19 @@ impl GrepBuilder {
         let expr = try!(self.parse());
         let literals = LiteralSets::create(&expr);
         let re = try!(self.regex(&expr));
-        let required = literals.to_regex().or_else(|| {
-            let expr = match strip_unicode_word_boundaries(&expr) {
-                None => return None,
-                Some(expr) => expr,
-            };
-            debug!("Stripped Unicode word boundaries. New AST:\n{:?}", expr);
-            self.regex(&expr).ok()
-        });
+        let required = match literals.to_regex_builder() {
+            Some(builder) => Some(try!(self.regex_build(builder))),
+            None => {
+                match strip_unicode_word_boundaries(&expr) {
+                    None => None,
+                    Some(expr) => {
+                        debug!("Stripped Unicode word boundaries. \
+                                New AST:\n{:?}", expr);
+                        self.regex(&expr).ok()
+                    }
+                }
+            }
+        };
         Ok(Grep {
             re: re,
             required: required,
@@ -162,11 +167,12 @@ impl GrepBuilder {
     /// Creates a new regex from the given expression with the current
     /// configuration.
     fn regex(&self, expr: &Expr) -> Result<Regex> {
-        let casei =
-            self.opts.case_insensitive
-            || (self.opts.case_smart && !has_uppercase_literal(expr));
-        RegexBuilder::new(&expr.to_string())
-            .case_insensitive(casei)
+        self.regex_build(RegexBuilder::new(&expr.to_string()))
+    }
+
+    /// Builds a new regex from the given builder using the caller's settings.
+    fn regex_build(&self, builder: RegexBuilder) -> Result<Regex> {
+        builder
             .multi_line(true)
             .unicode(true)
             .size_limit(self.opts.size_limit)
@@ -182,12 +188,30 @@ impl GrepBuilder {
             try!(syntax::ExprBuilder::new()
                  .allow_bytes(true)
                  .unicode(true)
-                 .case_insensitive(self.opts.case_insensitive)
+                 .case_insensitive(try!(self.is_case_insensitive()))
                  .parse(&self.pattern));
         let expr = try!(nonl::remove(expr, self.opts.line_terminator));
         debug!("regex ast:\n{:#?}", expr);
         Ok(expr)
     }
+
+    /// Determines whether the case insensitive flag should be enabled or not.
+    ///
+    /// An error is returned if the regex could not be parsed.
+    fn is_case_insensitive(&self) -> Result<bool> {
+        if self.opts.case_insensitive {
+            return Ok(true);
+        }
+        if !self.opts.case_smart {
+            return Ok(false);
+        }
+        let expr =
+            try!(syntax::ExprBuilder::new()
+                 .allow_bytes(true)
+                 .unicode(true)
+                 .parse(&self.pattern));
+        Ok(!has_uppercase_literal(&expr))
+    }
 }
 
 impl Grep {
diff --git a/tests/tests.rs b/tests/tests.rs
index 48fb8185..bf6f471d 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -874,6 +874,15 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| {
     assert_eq!(lines, "baz:test\n");
 });
 
+// See: https://github.com/BurntSushi/ripgrep/issues/199
+clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create("foo", "tEsT");
+    cmd.arg("--smart-case");
+
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "foo:tEsT\n");
+});
+
 // See: https://github.com/BurntSushi/ripgrep/issues/206
 clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| {
     wd.create_dir("foo");
author	Andrew Gallant <jamslam@gmail.com>	2016-11-06 12:07:47 -0500
committer	Andrew Gallant <jamslam@gmail.com>	2016-11-06 12:07:47 -0500
commit	0222e024fef6c52fc1cd01b6c7ec01ab38031bce (patch)
tree	f7f09e1a44d48a7b7e92c589317e1ca3c5fa0049
parent	5bd0edbbe10364d085ff0d314b2513e0e9fbf162 (diff)