diff options
author | Andrew Gallant <jamslam@gmail.com> | 2016-09-24 21:51:04 -0400 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2016-09-24 21:51:04 -0400 |
commit | 1595f0faf594be5d303b1783857d23a0fda74230 (patch) | |
tree | 054c7c5c6981a14bb1ce80512e9ed4690007ddc2 /grep | |
parent | 8eeb0c0b60da59828a48d995e969bdba5816ea31 (diff) |
Add --smart-case.
It does what it says on the tin.
Closes #70.
Diffstat (limited to 'grep')
-rw-r--r-- | grep/src/search.rs | 36 |
1 files changed, 35 insertions, 1 deletions
diff --git a/grep/src/search.rs b/grep/src/search.rs index 6bff2ba9..a91e5e4c 100644 --- a/grep/src/search.rs +++ b/grep/src/search.rs @@ -52,6 +52,7 @@ pub struct GrepBuilder { #[derive(Clone, Debug)] struct Options { case_insensitive: bool, + case_smart: bool, line_terminator: u8, size_limit: usize, dfa_size_limit: usize, @@ -61,6 +62,7 @@ impl Default for Options { fn default() -> Options { Options { case_insensitive: false, + case_smart: false, line_terminator: b'\n', size_limit: 10 * (1 << 20), dfa_size_limit: 10 * (1 << 20), @@ -98,6 +100,18 @@ impl GrepBuilder { self } + /// Whether to enable smart case search or not (disabled by default). + /// + /// Smart case uses case insensitive search if the regex is contains all + /// lowercase literal characters. Otherwise, a case sensitive search is + /// used instead. + /// + /// Enabling the case_insensitive flag overrides this. + pub fn case_smart(mut self, yes: bool) -> GrepBuilder { + self.opts.case_smart = yes; + self + } + /// Set the approximate size limit of the compiled regular expression. /// /// This roughly corresponds to the number of bytes occupied by a @@ -148,8 +162,11 @@ impl GrepBuilder { /// Creates a new regex from the given expression with the current /// configuration. fn regex(&self, expr: &Expr) -> Result<Regex> { + let casei = + self.opts.case_insensitive + || (self.opts.case_smart && !has_uppercase_literal(expr)); RegexBuilder::new(&expr.to_string()) - .case_insensitive(self.opts.case_insensitive) + .case_insensitive(casei) .multi_line(true) .unicode(true) .size_limit(self.opts.size_limit) @@ -274,6 +291,23 @@ impl<'b, 's> Iterator for Iter<'b, 's> { } } +fn has_uppercase_literal(expr: &Expr) -> bool { + use syntax::Expr::*; + match *expr { + Literal { ref chars, casei } => { + casei || chars.iter().any(|c| c.is_uppercase()) + } + LiteralBytes { ref bytes, casei } => { + casei || bytes.iter().any(|&b| b'A' <= b && b <= b'Z') + } + Group { ref e, .. } => has_uppercase_literal(e), + Repeat { ref e, .. } => has_uppercase_literal(e), + Concat(ref es) => es.iter().any(has_uppercase_literal), + Alternate(ref es) => es.iter().any(has_uppercase_literal), + _ => false, + } +} + #[cfg(test)] mod tests { #![allow(unused_imports)] |