summaryrefslogtreecommitdiffstats
path: root/crates/pcre2
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2020-02-17 18:19:19 -0500
committerAndrew Gallant <jamslam@gmail.com>2020-02-17 19:24:53 -0500
commitfdd8510fdda6109c562e479c718d42c8ecc26263 (patch)
treef510c952f62d0ebdcbf689000b2d7aebc90d5ba7 /crates/pcre2
parent0bc4f0447b05468f043e06278a3ca2b1c5646f9b (diff)
repo: move all source code in crates directory
The top-level listing was just getting a bit too long for my taste. So put all of the code in one directory and shrink the large top-level mess to a small top-level mess. NOTE: This commit only contains renames. The subsequent commit will actually make ripgrep build again. We do it this way with the naive hope that this will make it easier for git history to track the renames. Sigh.
Diffstat (limited to 'crates/pcre2')
-rw-r--r--crates/pcre2/Cargo.toml17
-rw-r--r--crates/pcre2/LICENSE-MIT21
-rw-r--r--crates/pcre2/README.md39
-rw-r--r--crates/pcre2/UNLICENSE24
-rw-r--r--crates/pcre2/src/error.rs59
-rw-r--r--crates/pcre2/src/lib.rs16
-rw-r--r--crates/pcre2/src/matcher.rs456
7 files changed, 632 insertions, 0 deletions
diff --git a/crates/pcre2/Cargo.toml b/crates/pcre2/Cargo.toml
new file mode 100644
index 00000000..8b907106
--- /dev/null
+++ b/crates/pcre2/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+name = "grep-pcre2"
+version = "0.1.3" #:version
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+description = """
+Use PCRE2 with the 'grep' crate.
+"""
+documentation = "https://docs.rs/grep-pcre2"
+homepage = "https://github.com/BurntSushi/ripgrep"
+repository = "https://github.com/BurntSushi/ripgrep"
+readme = "README.md"
+keywords = ["regex", "grep", "pcre", "backreference", "look"]
+license = "Unlicense/MIT"
+
+[dependencies]
+grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
+pcre2 = "0.2.0"
diff --git a/crates/pcre2/LICENSE-MIT b/crates/pcre2/LICENSE-MIT
new file mode 100644
index 00000000..3b0a5dc0
--- /dev/null
+++ b/crates/pcre2/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/crates/pcre2/README.md b/crates/pcre2/README.md
new file mode 100644
index 00000000..7b904256
--- /dev/null
+++ b/crates/pcre2/README.md
@@ -0,0 +1,39 @@
+grep-pcre2
+----------
+The `grep-pcre2` crate provides an implementation of the `Matcher` trait from
+the `grep-matcher` crate. This implementation permits PCRE2 to be used in the
+`grep` crate for fast line oriented searching.
+
+[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
+[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
+[![](https://img.shields.io/crates/v/grep-pcre2.svg)](https://crates.io/crates/grep-pcre2)
+
+Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
+
+### Documentation
+
+[https://docs.rs/grep-pcre2](https://docs.rs/grep-pcre2)
+
+**NOTE:** You probably don't want to use this crate directly. Instead, you
+should prefer the facade defined in the
+[`grep`](https://docs.rs/grep)
+crate.
+
+If you're looking to just use PCRE2 from Rust, then you probably want the
+[`pcre2`](https://docs.rs/pcre2)
+crate, which provide high level safe bindings to PCRE2.
+
+### Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+grep-pcre2 = "0.1"
+```
+
+and this to your crate root:
+
+```rust
+extern crate grep_pcre2;
+```
diff --git a/crates/pcre2/UNLICENSE b/crates/pcre2/UNLICENSE
new file mode 100644
index 00000000..68a49daa
--- /dev/null
+++ b/crates/pcre2/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/crates/pcre2/src/error.rs b/crates/pcre2/src/error.rs
new file mode 100644
index 00000000..7d0b17bb
--- /dev/null
+++ b/crates/pcre2/src/error.rs
@@ -0,0 +1,59 @@
+use std::error;
+use std::fmt;
+
+/// An error that can occur in this crate.
+///
+/// Generally, this error corresponds to problems building a regular
+/// expression, whether it's in parsing, compilation or a problem with
+/// guaranteeing a configured optimization.
+#[derive(Clone, Debug)]
+pub struct Error {
+ kind: ErrorKind,
+}
+
+impl Error {
+ pub(crate) fn regex<E: error::Error>(err: E) -> Error {
+ Error { kind: ErrorKind::Regex(err.to_string()) }
+ }
+
+ /// Return the kind of this error.
+ pub fn kind(&self) -> &ErrorKind {
+ &self.kind
+ }
+}
+
+/// The kind of an error that can occur.
+#[derive(Clone, Debug)]
+pub enum ErrorKind {
+ /// An error that occurred as a result of parsing a regular expression.
+ /// This can be a syntax error or an error that results from attempting to
+ /// compile a regular expression that is too big.
+ ///
+ /// The string here is the underlying error converted to a string.
+ Regex(String),
+ /// Hints that destructuring should not be exhaustive.
+ ///
+ /// This enum may grow additional variants, so this makes sure clients
+ /// don't count on exhaustive matching. (Otherwise, adding a new variant
+ /// could break existing code.)
+ #[doc(hidden)]
+ __Nonexhaustive,
+}
+
+impl error::Error for Error {
+ fn description(&self) -> &str {
+ match self.kind {
+ ErrorKind::Regex(_) => "regex error",
+ ErrorKind::__Nonexhaustive => unreachable!(),
+ }
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self.kind {
+ ErrorKind::Regex(ref s) => write!(f, "{}", s),
+ ErrorKind::__Nonexhaustive => unreachable!(),
+ }
+ }
+}
diff --git a/crates/pcre2/src/lib.rs b/crates/pcre2/src/lib.rs
new file mode 100644
index 00000000..d7cb0156
--- /dev/null
+++ b/crates/pcre2/src/lib.rs
@@ -0,0 +1,16 @@
+/*!
+An implementation of `grep-matcher`'s `Matcher` trait for
+[PCRE2](https://www.pcre.org/).
+*/
+
+#![deny(missing_docs)]
+
+extern crate grep_matcher;
+extern crate pcre2;
+
+pub use error::{Error, ErrorKind};
+pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
+pub use pcre2::{is_jit_available, version};
+
+mod error;
+mod matcher;
diff --git a/crates/pcre2/src/matcher.rs b/crates/pcre2/src/matcher.rs
new file mode 100644
index 00000000..a921c91b
--- /dev/null
+++ b/crates/pcre2/src/matcher.rs
@@ -0,0 +1,456 @@
+use std::collections::HashMap;
+
+use grep_matcher::{Captures, Match, Matcher};
+use pcre2::bytes::{CaptureLocations, Regex, RegexBuilder};
+
+use error::Error;
+
+/// A builder for configuring the compilation of a PCRE2 regex.
+#[derive(Clone, Debug)]
+pub struct RegexMatcherBuilder {
+ builder: RegexBuilder,
+ case_smart: bool,
+ word: bool,
+}
+
+impl RegexMatcherBuilder {
+ /// Create a new matcher builder with a default configuration.
+ pub fn new() -> RegexMatcherBuilder {
+ RegexMatcherBuilder {
+ builder: RegexBuilder::new(),
+ case_smart: false,
+ word: false,
+ }
+ }
+
+ /// Compile the given pattern into a PCRE matcher using the current
+ /// configuration.
+ ///
+ /// If there was a problem compiling the pattern, then an error is
+ /// returned.
+ pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
+ let mut builder = self.builder.clone();
+ if self.case_smart && !has_uppercase_literal(pattern) {
+ builder.caseless(true);
+ }
+ let res = if self.word {
+ let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
+ builder.build(&pattern)
+ } else {
+ builder.build(pattern)
+ };
+ res.map_err(Error::regex).map(|regex| {
+ let mut names = HashMap::new();
+ for (i, name) in regex.capture_names().iter().enumerate() {
+ if let Some(ref name) = *name {
+ names.insert(name.to_string(), i);
+ }
+ }
+ RegexMatcher { regex, names }
+ })
+ }
+
+ /// Enables case insensitive matching.
+ ///
+ /// If the `utf` option is also set, then Unicode case folding is used
+ /// to determine case insensitivity. When the `utf` option is not set,
+ /// then only standard ASCII case insensitivity is considered.
+ ///
+ /// This option corresponds to the `i` flag.
+ pub fn caseless(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.caseless(yes);
+ self
+ }
+
+ /// Whether to enable "smart case" or not.
+ ///
+ /// When smart case is enabled, the builder will automatically enable
+ /// case insensitive matching based on how the pattern is written. Namely,
+ /// case insensitive mode is enabled when both of the following things
+ /// are believed to be true:
+ ///
+ /// 1. The pattern contains at least one literal character. For example,
+ /// `a\w` contains a literal (`a`) but `\w` does not.
+ /// 2. Of the literals in the pattern, none of them are considered to be
+ /// uppercase according to Unicode. For example, `foo\pL` has no
+ /// uppercase literals but `Foo\pL` does.
+ ///
+ /// Note that the implementation of this is not perfect. Namely, `\p{Ll}`
+ /// will prevent case insensitive matching even though it is part of a meta
+ /// sequence. This bug will probably never be fixed.
+ pub fn case_smart(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.case_smart = yes;
+ self
+ }
+
+ /// Enables "dot all" matching.
+ ///
+ /// When enabled, the `.` metacharacter in the pattern matches any
+ /// character, include `\n`. When disabled (the default), `.` will match
+ /// any character except for `\n`.
+ ///
+ /// This option corresponds to the `s` flag.
+ pub fn dotall(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.dotall(yes);
+ self
+ }
+
+ /// Enable "extended" mode in the pattern, where whitespace is ignored.
+ ///
+ /// This option corresponds to the `x` flag.
+ pub fn extended(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.extended(yes);
+ self
+ }
+
+ /// Enable multiline matching mode.
+ ///
+ /// When enabled, the `^` and `$` anchors will match both at the beginning
+ /// and end of a subject string, in addition to matching at the start of
+ /// a line and the end of a line. When disabled, the `^` and `$` anchors
+ /// will only match at the beginning and end of a subject string.
+ ///
+ /// This option corresponds to the `m` flag.
+ pub fn multi_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.multi_line(yes);
+ self
+ }
+
+ /// Enable matching of CRLF as a line terminator.
+ ///
+ /// When enabled, anchors such as `^` and `$` will match any of the
+ /// following as a line terminator: `\r`, `\n` or `\r\n`.
+ ///
+ /// This is disabled by default, in which case, only `\n` is recognized as
+ /// a line terminator.
+ pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.crlf(yes);
+ self
+ }
+
+ /// Require that all matches occur on word boundaries.
+ ///
+ /// Enabling this option is subtly different than putting `\b` assertions
+ /// on both sides of your pattern. In particular, a `\b` assertion requires
+ /// that one side of it match a word character while the other match a
+ /// non-word character. This option, in contrast, merely requires that
+ /// one side match a non-word character.
+ ///
+ /// For example, `\b-2\b` will not match `foo -2 bar` since `-` is not a
+ /// word character. However, `-2` with this `word` option enabled will
+ /// match the `-2` in `foo -2 bar`.
+ pub fn word(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.word = yes;
+ self
+ }
+
+ /// Enable Unicode matching mode.
+ ///
+ /// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
+ /// `\d`, `\D`, `\s`, `\S`, `\w`, `\W`.
+ ///
+ /// When set, this implies UTF matching mode. It is not possible to enable
+ /// Unicode matching mode without enabling UTF matching mode.
+ ///
+ /// This is disabled by default.
+ pub fn ucp(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.ucp(yes);
+ self
+ }
+
+ /// Enable UTF matching mode.
+ ///
+ /// When enabled, characters are treated as sequences of code units that
+ /// make up a single codepoint instead of as single bytes. For example,
+ /// this will cause `.` to match any single UTF-8 encoded codepoint, where
+ /// as when this is disabled, `.` will any single byte (except for `\n` in
+ /// both cases, unless "dot all" mode is enabled).
+ ///
+ /// Note that when UTF matching mode is enabled, every search performed
+ /// will do a UTF-8 validation check, which can impact performance. The
+ /// UTF-8 check can be disabled via the `disable_utf_check` option, but it
+ /// is undefined behavior to enable UTF matching mode and search invalid
+ /// UTF-8.
+ ///
+ /// This is disabled by default.
+ pub fn utf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.utf(yes);
+ self
+ }
+
+ /// When UTF matching mode is enabled, this will disable the UTF checking
+ /// that PCRE2 will normally perform automatically. If UTF matching mode
+ /// is not enabled, then this has no effect.
+ ///
+ /// UTF checking is enabled by default when UTF matching mode is enabled.
+ /// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
+ /// will return an error if you attempt to search a subject string that is
+ /// not valid UTF-8.
+ ///
+ /// # Safety
+ ///
+ /// It is undefined behavior to disable the UTF check in UTF matching mode
+ /// and search a subject string that is not valid UTF-8. When the UTF check
+ /// is disabled, callers must guarantee that the subject string is valid
+ /// UTF-8.
+ pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
+ self.builder.disable_utf_check();
+ self
+ }
+
+ /// Enable PCRE2's JIT and return an error if it's not available.
+ ///
+ /// This generally speeds up matching quite a bit. The downside is that it
+ /// can increase the time it takes to compile a pattern.
+ ///
+ /// If the JIT isn't available or if JIT compilation returns an error, then
+ /// regex compilation will fail with the corresponding error.
+ ///
+ /// This is disabled by default, and always overrides `jit_if_available`.
+ pub fn jit(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.jit(yes);
+ self
+ }
+
+ /// Enable PCRE2's JIT if it's available.
+ ///
+ /// This generally speeds up matching quite a bit. The downside is that it
+ /// can increase the time it takes to compile a pattern.
+ ///
+ /// If the JIT isn't available or if JIT compilation returns an error,
+ /// then a debug message with the error will be emitted and the regex will
+ /// otherwise silently fall back to non-JIT matching.
+ ///
+ /// This is disabled by default, and always overrides `jit`.
+ pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
+ self.builder.jit_if_available(yes);
+ self
+ }
+
+ /// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is
+ /// not enabled, then this has no effect.
+ ///
+ /// When `None` is given, no custom JIT stack will be created, and instead,
+ /// the default JIT stack is used. When the default is used, its maximum
+ /// size is 32 KB.
+ ///
+ /// When this is set, then a new JIT stack will be created with the given
+ /// maximum size as its limit.
+ ///
+ /// Increasing the stack size can be useful for larger regular expressions.
+ ///
+ /// By default, this is set to `None`.
+ pub fn max_jit_stack_size(
+ &mut self,
+ bytes: Option<usize>,
+ ) -> &mut RegexMatcherBuilder {
+ self.builder.max_jit_stack_size(bytes);
+ self
+ }
+}
+
+/// An implementation of the `Matcher` trait using PCRE2.
+#[derive(Clone, Debug)]
+pub struct RegexMatcher {
+ regex: Regex,
+ names: HashMap<String, usize>,
+}
+
+impl RegexMatcher {
+ /// Create a new matcher from the given pattern using the default
+ /// configuration.
+ pub fn new(pattern: &str) -> Result<RegexMatcher, Error> {
+ RegexMatcherBuilder::new().build(pattern)
+ }
+}
+
+impl Matcher for RegexMatcher {
+ type Captures = RegexCaptures;
+ type Error = Error;
+
+ fn find_at(
+ &self,
+ haystack: &[u8],
+ at: usize,
+ ) -> Result<Option<Match>, Error> {
+ Ok(self
+ .regex
+ .find_at(haystack, at)
+ .map_err(Error::regex)?
+ .map(|m| Match::new(m.start(), m.end())))
+ }
+
+ fn new_captures(&self) -> Result<RegexCaptures, Error> {
+ Ok(RegexCaptures::new(self.regex.capture_locations()))
+ }
+
+ fn capture_count(&self) -> usize {
+ self.regex.captures_len()
+ }
+
+ fn capture_index(&self, name: &str) -> Option<usize> {
+ self.names.get(name).map(|i| *i)
+ }
+
+ fn try_find_iter<F, E>(
+ &self,
+ haystack: &[u8],
+ mut matched: F,
+ ) -> Result<Result<(), E>, Error>
+ where
+ F: FnMut(Match) -> Result<bool, E>,
+ {
+ for result in self.regex.find_iter(haystack) {
+ let m = result.map_err(Error::regex)?;
+ match matched(Match::new(m.start(), m.end())) {
+ Ok(true) => continue,
+ Ok(false) => return Ok(Ok(())),
+ Err(err) => return Ok(Err(err)),
+ }
+ }
+ Ok(Ok(()))
+ }
+
+ fn captures_at(
+ &self,
+ haystack: &[u8],
+ at: usize,
+ caps: &mut RegexCaptures,
+ ) -> Result<bool, Error> {
+ Ok(self
+ .regex
+ .captures_read_at(&mut caps.locs, haystack, at)
+ .map_err(Error::regex)?
+ .is_some())
+ }
+}
+
+/// Represents the match offsets of each capturing group in a match.
+///
+/// The first, or `0`th capture group, always corresponds to the entire match
+/// and is guaranteed to be present when a match occurs. The next capture
+/// group, at index `1`, corresponds to the first capturing group in the regex,
+/// ordered by the position at which the left opening parenthesis occurs.
+///
+/// Note that not all capturing groups are guaranteed to be present in a match.
+/// For example, in the regex, `(?P<foo>\w)|(?P<bar>\W)`, only one of `foo`
+/// or `bar` will ever be set in any given match.
+///
+/// In order to access a capture group by name, you'll need to first find the
+/// index of the group using the corresponding matcher's `capture_index`
+/// method, and then use that index with `RegexCaptures::get`.
+#[derive(Clone, Debug)]
+pub struct RegexCaptures {
+ /// Where the locations are stored.
+ locs: CaptureLocations,
+}
+
+impl Captures for RegexCaptures {
+ fn len(&self) -> usize {
+ self.locs.len()
+ }
+
+ fn get(&self, i: usize) -> Option<Match> {
+ self.locs.get(i).map(|(s, e)| Match::new(s, e))
+ }
+}
+
+impl RegexCaptures {
+ pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
+ RegexCaptures { locs }
+ }
+}
+
+/// Determine whether the pattern contains an uppercase character which should
+/// negate the effect of the smart-case option.
+///
+/// Ideally we would be able to check the AST in order to correctly handle
+/// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly
+/// cased), but PCRE doesn't expose enough details for that kind of analysis.
+/// For now, our 'good enough' solution is to simply perform a semi-naïve
+/// scan of the input pattern and ignore all characters following a '\'. The
+/// This at least lets us support the most common cases, like 'foo\w' and
+/// 'foo\S', in an intuitive manner.
+fn has_uppercase_literal(pattern: &str) -> bool {
+ let mut chars = pattern.chars();
+ while let Some(c) = chars.next() {
+ if c == '\\' {
+ chars.next();
+ } else if c.is_uppercase() {
+ return true;
+ }
+ }
+ false
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use grep_matcher::{LineMatchKind, Matcher};
+
+ // Test that enabling word matches does the right thing and demonstrate
+ // the difference between it and surrounding the regex in `\b`.
+ #[test]
+ fn word() {
+ let matcher =
+ RegexMatcherBuilder::new().word(true).build(r"-2").unwrap();
+ assert!(matcher.is_match(b"abc -2 foo").unwrap());
+
+ let matcher =
+ RegexMatcherBuilder::new().word(false).build(r"\b-2\b").unwrap();
+ assert!(!matcher.is_match(b"abc -2 foo").unwrap());
+ }
+
+ // Test that enabling CRLF permits `$` to match at the end of a line.
+ #[test]
+ fn line_terminator_crlf() {
+ // Test normal use of `$` with a `\n` line terminator.
+ let matcher = RegexMatcherBuilder::new()
+ .multi_line(true)
+ .build(r"abc$")
+ .unwrap();
+ assert!(matcher.is_match(b"abc\n").unwrap());
+
+ // Test that `$` doesn't match at `\r\n` boundary normally.
+ let matcher = RegexMatcherBuilder::new()
+ .multi_line(true)
+ .build(r"abc$")
+ .unwrap();
+ assert!(!matcher.is_match(b"abc\r\n").unwrap());
+
+ // Now check the CRLF handling.
+ let matcher = RegexMatcherBuilder::new()
+ .multi_line(true)
+ .crlf(true)
+ .build(r"abc$")
+ .unwrap();
+ assert!(matcher.is_match(b"abc\r\n").unwrap());
+ }
+
+ // Test that smart case works.
+ #[test]
+ fn case_smart() {
+ let matcher =
+ RegexMatcherBuilder::new().case_smart(true).build(r"abc").unwrap();
+ assert!(matcher.is_match(b"ABC").unwrap());
+
+ let matcher =
+ RegexMatcherBuilder::new().case_smart(true).build(r"aBc").unwrap();
+ assert!(!matcher.is_match(b"ABC").unwrap());
+ }
+
+ // Test that finding candidate lines works as expected.
+ #[test]
+ fn candidate_lines() {
+ fn is_confirmed(m: LineMatchKind) -> bool {
+ match m {
+ LineMatchKind::Confirmed(_) => true,
+ _ => false,
+ }
+ }
+
+ let matcher = RegexMatcherBuilder::new().build(r"\wfoo\s").unwrap();
+ let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
+ assert!(is_confirmed(m));
+ }
+}