summaryrefslogtreecommitdiffstats
path: root/grep-pcre2
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2018-04-29 09:29:52 -0400
committerAndrew Gallant <jamslam@gmail.com>2018-08-20 07:10:19 -0400
commitd9ca5293569efb255608d3c601107bcfe7060f15 (patch)
tree7fd8611c333a2f7d703987de3a379ee8292013e2 /grep-pcre2
parent0958837ee104985412f08e81b6f08df1e5291042 (diff)
libripgrep: initial commit introducing libripgrep
libripgrep is not any one library, but rather, a collection of libraries that roughly separate the following key distinct phases in a grep implementation: 1. Pattern matching (e.g., by a regex engine). 2. Searching a file using a pattern matcher. 3. Printing results. Ultimately, both (1) and (3) are defined by de-coupled interfaces, of which there may be multiple implementations. Namely, (1) is satisfied by the `Matcher` trait in the `grep-matcher` crate and (3) is satisfied by the `Sink` trait in the `grep2` crate. The searcher (2) ties everything together and finds results using a matcher and reports those results using a `Sink` implementation. Closes #162
Diffstat (limited to 'grep-pcre2')
-rw-r--r--grep-pcre2/Cargo.toml17
-rw-r--r--grep-pcre2/LICENSE-MIT21
-rw-r--r--grep-pcre2/README.md39
-rw-r--r--grep-pcre2/UNLICENSE24
-rw-r--r--grep-pcre2/src/error.rs59
-rw-r--r--grep-pcre2/src/lib.rs15
-rw-r--r--grep-pcre2/src/matcher.rs425
7 files changed, 600 insertions, 0 deletions
diff --git a/grep-pcre2/Cargo.toml b/grep-pcre2/Cargo.toml
new file mode 100644
index 00000000..28d1f167
--- /dev/null
+++ b/grep-pcre2/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "grep-pcre2"
+version = "0.0.1" #:version
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+description = """
+Use PCRE2 with the 'grep' crate.
+"""
+documentation = "https://docs.rs/grep-pcre2"
+homepage = "https://github.com/BurntSushi/ripgrep"
+repository = "https://github.com/BurntSushi/ripgrep"
+readme = "README.md"
+keywords = ["regex", "grep", "pcre", "backreference", "look"]
+license = "Unlicense/MIT"
+
+[dependencies]
+grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
+pcre2 = "0.1"
diff --git a/grep-pcre2/LICENSE-MIT b/grep-pcre2/LICENSE-MIT
new file mode 100644
index 00000000..3b0a5dc0
--- /dev/null
+++ b/grep-pcre2/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/grep-pcre2/README.md b/grep-pcre2/README.md
new file mode 100644
index 00000000..7b904256
--- /dev/null
+++ b/grep-pcre2/README.md
@@ -0,0 +1,39 @@
+grep-pcre2
+----------
+The `grep-pcre2` crate provides an implementation of the `Matcher` trait from
+the `grep-matcher` crate. This implementation permits PCRE2 to be used in the
+`grep` crate for fast line oriented searching.
+
+[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
+[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
+[![](https://img.shields.io/crates/v/grep-pcre2.svg)](https://crates.io/crates/grep-pcre2)
+
+Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
+
+### Documentation
+
+[https://docs.rs/grep-pcre2](https://docs.rs/grep-pcre2)
+
+**NOTE:** You probably don't want to use this crate directly. Instead, you
+should prefer the facade defined in the
+[`grep`](https://docs.rs/grep)
+crate.
+
+If you're looking to just use PCRE2 from Rust, then you probably want the
+[`pcre2`](https://docs.rs/pcre2)
+crate, which provide high level safe bindings to PCRE2.
+
+### Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+grep-pcre2 = "0.1"
+```
+
+and this to your crate root:
+
+```rust
+extern crate grep_pcre2;
+```
diff --git a/grep-pcre2/UNLICENSE b/grep-pcre2/UNLICENSE
new file mode 100644
index 00000000..68a49daa
--- /dev/null
+++ b/grep-pcre2/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/grep-pcre2/src/error.rs b/grep-pcre2/src/error.rs
new file mode 100644
index 00000000..7d0b17bb
--- /dev/null
+++ b/grep-pcre2/src/error.rs
@@ -0,0 +1,59 @@
+use std::error;
+use std::fmt;
+
+/// An error that can occur in this crate.
+///
+/// Generally, this error corresponds to problems building a regular
+/// expression, whether it's in parsing, compilation or a problem with
+/// guaranteeing a configured optimization.
+#[derive(Clone, Debug)]
+pub struct Error {
+ kind: ErrorKind,
+}
+
+impl Error {
+ pub(crate) fn regex<E: error::Error>(err: E) -> Error {
+ Error { kind: ErrorKind::Regex(err.to_string()) }
+ }
+
+ /// Return the kind of this error.
+ pub fn kind(&self) -> &ErrorKind {
+ &self.kind
+ }
+}
+
+/// The kind of an error that can occur.
+#[derive(Clone, Debug)]
+pub enum ErrorKind {
+ /// An error that occurred as a result of parsing a regular expression.
+ /// This can be a syntax error or an error that results from attempting to
+ /// compile a regular expression that is too big.
+ ///
+ /// The string here is the underlying error converted to a string.
+ Regex(String),
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl error::Error for Error {
+ fn description(&self) -> &str {
+ match self.kind {
+ ErrorKind::Regex(_) => "regex error",
+ ErrorKind::__Nonexhaustive => unreachable!(),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self.kind {
+ ErrorKind::Regex(ref s) => write!(f, "{}", s),
+ ErrorKind::__Nonexhaustive => unreachable!(),
+ }
+ }
+}
diff --git a/grep-pcre2/src/lib.rs b/grep-pcre2/src/lib.rs
new file mode 100644
index 00000000..245ceab7
--- /dev/null
+++ b/grep-pcre2/src/lib.rs
@@ -0,0 +1,15 @@
+/*!
+An implementation of `grep-matcher`'s `Matcher` trait for
+[PCRE2](https://www.pcre.org/).
+*/
+
+#![deny(missing_docs)]
+
+extern crate grep_matcher;
+extern crate pcre2;
+
+pub use error::{Error, ErrorKind};
+pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
+
+mod error;
+mod matcher;
diff --git a/grep-pcre2/src/matcher.rs b/grep-pcre2/src/matcher.rs
new file mode 100644
index 00000000..e9c51be2
--- /dev/null
+++ b/grep-pcre2/src/matcher.rs
@@ -0,0 +1,425 @@
+use std::collections::HashMap;
+
+use grep_matcher::{Captures, Match, Matcher};
+use pcre2::bytes::{CaptureLocations, Regex, RegexBuilder};
+
+use error::Error;
+
+/// A builder for configuring the compilation of a PCRE2 regex.
+#[derive(Clone, Debug)]
+pub struct RegexMatcherBuilder {
+ builder: RegexBuilder,
+ case_smart: bool,
+ word: bool,
+}
+
+impl RegexMatcherBuilder {
+ /// Create a new matcher builder with a default configuration.
+ pub fn new() -> RegexMatcherBuilder {
+ RegexMatcherBuilder {
+ builder: RegexBuilder::new(),
+ case_smart: false,
+ word: false,
+ }
+ }
+
+ /// Compile the given pattern into a PCRE matcher using the current
+ /// configuration.
+ ///
+ /// If there was a problem compiling the pattern, then an error is
+ /// returned.
+ pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
+ let mut builder = self.builder.clone();
+ if self.case_smart && !has_uppercase_literal(pattern) {
+ builder.caseless(true);
+ }
+ let res =
+ if self.word {
+ let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
+ builder.build(&pattern)
+ } else {
+ builder.build(pattern)
+ };
+ res.map_err(Error::regex).map(|regex| {
+ let mut names = HashMap::new();
+ for (i, name) in regex.capture_names().iter().enumerate() {
+ if let Some(ref name) = *name {
+ names.insert(name.to_string(), i);
+ }
+ }
+ RegexMatcher { regex, names }
+ })
+ }
+
+ /// Enables case insensitive matching.
+ ///
+ /// If the `utf` option is also set, then Unicode case folding is used
+ /// to determine case insensitivity. When the `utf` option is not set,
+ /// then only standard ASCII case insensitivity is considered.
+ ///
+ /// This option corresponds to the `i` flag.
+ pub fn caseless(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.caseless(yes);
+ self
+ }
+
+ /// Whether to enable "smart case" or not.
+ ///
+ /// When smart case is enabled, the builder will automatically enable
+ /// case insensitive matching based on how the pattern is written. Namely,
+ /// case insensitive mode is enabled when both of the following things
+ /// are believed to be true:
+ ///
+ /// 1. The pattern contains at least one literal character. For example,
+ /// `a\w` contains a literal (`a`) but `\w` does not.
+ /// 2. Of the literals in the pattern, none of them are considered to be
+ /// uppercase according to Unicode. For example, `foo\pL` has no
+ /// uppercase literals but `Foo\pL` does.
+ ///
+ /// Note that the implementation of this is not perfect. Namely, `\p{Ll}`
+ /// will prevent case insensitive matching even though it is part of a meta
+ /// sequence. This bug will probably never be fixed.
+ pub fn case_smart(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.case_smart = yes;
+ self
+ }
+
+ /// Enables "dot all" matching.
+ ///
+ /// When enabled, the `.` metacharacter in the pattern matches any
+ /// character, include `\n`. When disabled (the default), `.` will match
+ /// any character except for `\n`.
+ ///
+ /// This option corresponds to the `s` flag.
+ pub fn dotall(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.dotall(yes);
+ self
+ }
+
+ /// Enable "extended" mode in the pattern, where whitespace is ignored.
+ ///
+ /// This option corresponds to the `x` flag.
+ pub fn extended(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.extended(yes);
+ self
+ }
+
+ /// Enable multiline matching mode.
+ ///
+ /// When enabled, the `^` and `$` anchors will match both at the beginning
+ /// and end of a subject string, in addition to matching at the start of
+ /// a line and the end of a line. When disabled, the `^` and `$` anchors
+ /// will only match at the beginning and end of a subject string.
+ ///
+ /// This option corresponds to the `m` flag.
+ pub fn multi_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.multi_line(yes);
+ self
+ }
+
+ /// Enable matching of CRLF as a line terminator.
+ ///
+ /// When enabled, anchors such as `^` and `$` will match any of the
+ /// following as a line terminator: `\r`, `\n` or `\r\n`.
+ ///
+ /// This is disabled by default, in which case, only `\n` is recognized as
+ /// a line terminator.
+ pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.crlf(yes);
+ self
+ }
+
+ /// Require that all matches occur on word boundaries.
+ ///
+ /// Enabling this option is subtly different than putting `\b` assertions
+ /// on both sides of your pattern. In particular, a `\b` assertion requires
+ /// that one side of it match a word character while the other match a
+ /// non-word character. This option, in contrast, merely requires that
+ /// one side match a non-word character.
+ ///
+ /// For example, `\b-2\b` will not match `foo -2 bar` since `-` is not a
+ /// word character. However, `-2` with this `word` option enabled will
+ /// match the `-2` in `foo -2 bar`.
+ pub fn word(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.word = yes;
+ self
+ }
+
+ /// Enable Unicode matching mode.
+ ///
+ /// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
+ /// `\d`, `\D`, `\s`, `\S`, `\w`, `\W`.
+ ///
+ /// When set, this implies UTF matching mode. It is not possible to enable
+ /// Unicode matching mode without enabling UTF matching mode.
+ ///
+ /// This is disabled by default.
+ pub fn ucp(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.ucp(yes);
+ self
+ }
+
+ /// Enable UTF matching mode.
+ ///
+ /// When enabled, characters are treated as sequences of code units that
+ /// make up a single codepoint instead of as single bytes. For example,
+ /// this will cause `.` to match any single UTF-8 encoded codepoint, where
+ /// as when this is disabled, `.` will any single byte (except for `\n` in
+ /// both cases, unless "dot all" mode is enabled).
+ ///
+ /// Note that when UTF matching mode is enabled, every search performed
+ /// will do a UTF-8 validation check, which can impact performance. The
+ /// UTF-8 check can be disabled via the `disable_utf_check` option, but it
+ /// is undefined behavior to enable UTF matching mode and search invalid
+ /// UTF-8.
+ ///
+ /// This is disabled by default.
+ pub fn utf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.utf(yes);
+ self
+ }
+
+ /// When UTF matching mode is enabled, this will disable the UTF checking
+ /// that PCRE2 will normally perform automatically. If UTF matching mode
+ /// is not enabled, then this has no effect.
+ ///
+ /// UTF checking is enabled by default when UTF matching mode is enabled.
+ /// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
+ /// will return an error if you attempt to search a subject string that is
+ /// not valid UTF-8.
+ ///
+ /// # Safety
+ ///
+ /// It is undefined behavior to disable the UTF check in UTF matching mode
+ /// and search a subject string that is not valid UTF-8. When the UTF check
+ /// is disabled, callers must guarantee that the subject string is valid
+ /// UTF-8.
+ pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
+ self.builder.disable_utf_check();
+ self
+ }
+
+ /// Enable PCRE2's JIT.
+ ///
+ /// This generally speeds up matching quite a bit. The downside is that it
+ /// can increase the time it takes to compile a pattern.
+ ///
+ /// This is disabled by default.
+ pub fn jit(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.jit(yes);
+ self
+ }
+}
+
+/// An implementation of the `Matcher` trait using PCRE2.
+#[derive(Clone, Debug)]
+pub struct RegexMatcher {
+ regex: Regex,
+ names: HashMap<String, usize>,
+}
+
+impl RegexMatcher {
+ /// Create a new matcher from the given pattern using the default
+ /// configuration.
+ pub fn new(pattern: &str) -> Result<RegexMatcher, Error> {
+ RegexMatcherBuilder::new().build(pattern)
+ }
+}
+
+impl Matcher for RegexMatcher {
+ type Captures = RegexCaptures;
+ type Error = Error;
+
+ fn find_at(
+ &self,
+ haystack: &[u8],
+ at: usize,
+ ) -> Result<Option<Match>, Error> {
+ Ok(self.regex
+ .find_at(haystack, at)
+ .map_err(Error::regex)?
+ .map(|m| Match::new(m.start(), m.end())))
+ }
+
+ fn new_captures(&self) -> Result<RegexCaptures, Error> {
+ Ok(RegexCaptures::new(self.regex.capture_locations()))
+ }
+
+ fn capture_count(&self) -> usize {
+ self.regex.captures_len()
+ }
+
+ fn capture_index(&self, name: &str) -> Option<usize> {
+ self.names.get(name).map(|i| *i)
+ }
+
+ fn try_find_iter<F, E>(
+ &self,
+ haystack: &[u8],
+ mut matched: F,
+ ) -> Result<Result<(), E>, Error>
+ where F: FnMut(Match) -> Result<bool, E>
+ {
+ for result in self.regex.find_iter(haystack) {
+ let m = result.map_err(Error::regex)?;
+ match matched(Match::new(m.start(), m.end())) {
+ Ok(true) => continue,
+ Ok(false) => return Ok(Ok(())),
+ Err(err) => return Ok(Err(err)),
+ }
+ }
+ Ok(Ok(()))
+ }
+
+ fn captures_at(
+ &self,
+ haystack: &[u8],
+ at: usize,
+ caps: &mut RegexCaptures,
+ ) -> Result<bool, Error> {
+ Ok(self.regex
+ .captures_read_at(&mut caps.locs, haystack, at)
+ .map_err(Error::regex)?
+ .is_some())
+ }
+}
+
+/// Represents the match offsets of each capturing group in a match.
+///
+/// The first, or `0`th capture group, always corresponds to the entire match
+/// and is guaranteed to be present when a match occurs. The next capture
+/// group, at index `1`, corresponds to the first capturing group in the regex,
+/// ordered by the position at which the left opening parenthesis occurs.
+///
+/// Note that not all capturing groups are guaranteed to be present in a match.
+/// For example, in the regex, `(?P<foo>\w)|(?P<bar>\W)`, only one of `foo`
+/// or `bar` will ever be set in any given match.
+///
+/// In order to access a capture group by name, you'll need to first find the
+/// index of the group using the corresponding matcher's `capture_index`
+/// method, and then use that index with `RegexCaptures::get`.
+#[derive(Clone, Debug)]
+pub struct RegexCaptures {
+ /// Where the locations are stored.
+ locs: CaptureLocations,
+}
+
+impl Captures for RegexCaptures {
+ fn len(&self) -> usize {
+ self.locs.len()
+ }
+
+ fn get(&self, i: usize) -> Option<Match> {
+ self.locs.get(i).map(|(s, e)| Match::new(s, e))
+ }
+}
+
+impl RegexCaptures {
+ pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
+ RegexCaptures { locs }
+ }
+}
+
+/// Determine whether the pattern contains an uppercase character which should
+/// negate the effect of the smart-case option.
+///
+/// Ideally we would be able to check the AST in order to correctly handle
+/// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly
+/// cased), but PCRE doesn't expose enough details for that kind of analysis.
+/// For now, our 'good enough' solution is to simply perform a semi-naïve
+/// scan of the input pattern and ignore all characters following a '\'. The
+/// This at least lets us support the most common cases, like 'foo\w' and
+/// 'foo\S', in an intuitive manner.
+fn has_uppercase_literal(pattern: &str) -> bool {
+ let mut chars = pattern.chars();
+ while let Some(c) = chars.next() {
+ if c == '\\' {
+ chars.next();
+ } else if c.is_uppercase() {
+ return true;
+ }
+ }
+ false
+}
+
+#[cfg(test)]
+mod tests {
+ use grep_matcher::{LineMatchKind, Matcher};
+ use super::*;
+
+ // Test that enabling word matches does the right thing and demonstrate
+ // the difference between it and surrounding the regex in `\b`.
+ #[test]
+ fn word() {
+ let matcher = RegexMatcherBuilder::new()
+ .word(true)
+ .build(r"-2")
+ .unwrap();
+ assert!(matcher.is_match(b"abc -2 foo").unwrap());
+
+ let matcher = RegexMatcherBuilder::new()
+ .word(false)
+ .build(r"\b-2\b")
+ .unwrap();
+ assert!(!matcher.is_match(b"abc -2 foo").unwrap());
+ }
+
+ // Test that enabling CRLF permits `$` to match at the end of a line.
+ #[test]
+ fn line_terminator_crlf() {
+ // Test normal use of `$` with a `\n` line terminator.
+ let matcher = RegexMatcherBuilder::new()
+ .multi_line(true)
+ .build(r"abc$")
+ .unwrap();
+ assert!(matcher.is_match(b"abc\n").unwrap());
+
+ // Test that `$` doesn't match at `\r\n` boundary normally.
+ let matcher = RegexMatcherBuilder::new()
+ .multi_line(true)
+ .build(r"abc$")
+ .unwrap();
+ assert!(!matcher.is_match(b"abc\r\n").unwrap());
+
+ // Now check the CRLF handling.
+ let matcher = RegexMatcherBuilder::new()
+ .multi_line(true)
+ .crlf(true)
+ .build(r"abc$")
+ .unwrap();
+ assert!(matcher.is_match(b"abc\r\n").unwrap());
+ }
+
+ // Test that smart case works.
+ #[test]
+ fn case_smart() {
+ let matcher = RegexMatcherBuilder::new()
+ .case_smart(true)
+ .build(r"abc")
+ .unwrap();
+ assert!(matcher.is_match(b"ABC").unwrap());
+
+ let matcher = RegexMatcherBuilder::new()
+ .case_smart(true)
+ .build(r"aBc")
+ .unwrap();
+ assert!(!matcher.is_match(b"ABC").unwrap());
+ }
+
+ // Test that finding candidate lines works as expected.
+ #[test]
+ fn candidate_lines() {
+ fn is_confirmed(m: LineMatchKind) -> bool {
+ match m {
+ LineMatchKind::Confirmed(_) => true,
+ _ => false,
+ }
+ }
+
+ let matcher = RegexMatcherBuilder::new()
+ .build(r"\wfoo\s")
+ .unwrap();
+ let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
+ assert!(is_confirmed(m));
+ }
+}