summaryrefslogtreecommitdiffstats
path: root/grep/src/search.rs
diff options
context:
space:
mode:
Diffstat (limited to 'grep/src/search.rs')
-rw-r--r--grep/src/search.rs58
1 files changed, 9 insertions, 49 deletions
diff --git a/grep/src/search.rs b/grep/src/search.rs
index 1d5d7e29..49ddf1f8 100644
--- a/grep/src/search.rs
+++ b/grep/src/search.rs
@@ -1,10 +1,11 @@
use memchr::{memchr, memrchr};
+use syntax::ParserBuilder;
+use syntax::hir::Hir;
use regex::bytes::{Regex, RegexBuilder};
use literals::LiteralSets;
use nonl;
-use syntax::ParserBuilder;
-use syntax::hir::Hir;
+use smart_case::Cased;
use word_boundary::strip_unicode_word_boundaries;
use Result;
@@ -205,7 +206,11 @@ impl GrepBuilder {
if !self.opts.case_smart {
return Ok(false);
}
- Ok(!has_uppercase_literal(&self.pattern))
+ let cased = match Cased::from_pattern(&self.pattern) {
+ None => return Ok(false),
+ Some(cased) => cased,
+ };
+ Ok(cased.any_literal && !cased.any_uppercase)
}
}
@@ -311,44 +316,15 @@ impl<'b, 's> Iterator for Iter<'b, 's> {
}
}
-/// Determine whether the pattern contains an uppercase character which should
-/// negate the effect of the smart-case option.
-///
-/// Ideally we would be able to check the AST in order to correctly handle
-/// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly
-/// cased), but we don't currently have that option. For now, our 'good enough'
-/// solution is to simply perform a semi-naïve scan of the input pattern and
-/// ignore all characters following a '\'. The ExprBuilder will handle any
-/// actual errors, and this at least lets us support the most common cases,
-/// like 'foo\w' and 'foo\S', in an intuitive manner.
-fn has_uppercase_literal(pattern: &str) -> bool {
- let mut chars = pattern.chars();
- while let Some(c) = chars.next() {
- if c == '\\' {
- chars.next();
- } else if c.is_uppercase() {
- return true;
- }
- }
- false
-}
-
#[cfg(test)]
mod tests {
- #![allow(unused_imports)]
-
use memchr::{memchr, memrchr};
use regex::bytes::Regex;
- use super::{GrepBuilder, Match, has_uppercase_literal};
+ use super::{GrepBuilder, Match};
static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt");
- #[allow(dead_code)]
- fn s(bytes: &[u8]) -> String {
- String::from_utf8(bytes.to_vec()).unwrap()
- }
-
fn find_lines(pat: &str, haystack: &[u8]) -> Vec<Match> {
let re = Regex::new(pat).unwrap();
let mut lines = vec![];
@@ -377,20 +353,4 @@ mod tests {
assert_eq!(expected.len(), got.len());
assert_eq!(expected, got);
}
-
- #[test]
- fn pattern_case() {
- assert_eq!(has_uppercase_literal(&"".to_string()), false);
- assert_eq!(has_uppercase_literal(&"foo".to_string()), false);
- assert_eq!(has_uppercase_literal(&"Foo".to_string()), true);
- assert_eq!(has_uppercase_literal(&"foO".to_string()), true);
- assert_eq!(has_uppercase_literal(&"foo\\\\".to_string()), false);
- assert_eq!(has_uppercase_literal(&"foo\\w".to_string()), false);
- assert_eq!(has_uppercase_literal(&"foo\\S".to_string()), false);
- assert_eq!(has_uppercase_literal(&"foo\\p{Ll}".to_string()), true);
- assert_eq!(has_uppercase_literal(&"foo[a-z]".to_string()), false);
- assert_eq!(has_uppercase_literal(&"foo[A-Z]".to_string()), true);
- assert_eq!(has_uppercase_literal(&"foo[\\S\\t]".to_string()), false);
- assert_eq!(has_uppercase_literal(&"foo\\\\S".to_string()), true);
- }
}