globset: polishing

This brings the code in line with my current style. It also inlines the dozen or so lines of code for FNV hashing instead of bringing in a micro-crate for it. Finally, it drops the dependency on regex in favor of using regex-syntax and regex-automata directly.
author: Andrew Gallant <jamslam@gmail.com> 2023-09-26 15:01:20 -0400
committer: Andrew Gallant <jamslam@gmail.com> 2023-10-09 20:29:52 -0400
commit: 7f456404010005142a6493fb48bd9fd06aca2731 (patch)
tree: 4ab69c918f3d186e7b911d7962e93acd40be34d4 /crates/globset
parent: 0951820f63a1e38d755b2905e389e09c9b569040 (diff)
5 files changed, 177 insertions, 144 deletions
diff --git a/crates/globset/Cargo.toml b/crates/globset/Cargo.toml
index 799224e8..75486ddb 100644
--- a/crates/globset/Cargo.toml
+++ b/crates/globset/Cargo.toml
@@ -13,24 +13,32 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
 readme = "README.md"
 keywords = ["regex", "glob", "multiple", "set", "pattern"]
 license = "Unlicense OR MIT"
-edition = "2018"
+edition = "2021"
 
 [lib]
 name = "globset"
 bench = false
 
 [dependencies]
-aho-corasick = "1.0.2"
-bstr = { version = "1.6.0", default-features = false, features = ["std"] }
-fnv = "1.0.6"
-log = { version = "0.4.5", optional = true }
-regex = { version = "1.8.3", default-features = false, features = ["perf", "std"] }
-serde = { version = "1.0.104", optional = true }
+aho-corasick = "1.1.1"
+bstr = { version = "1.6.2", default-features = false, features = ["std"] }
+log = { version = "0.4.20", optional = true }
+serde = { version = "1.0.188", optional = true }
+
+[dependencies.regex-syntax]
+version = "0.7.5"
+default-features = false
+features = ["std"]
+
+[dependencies.regex-automata]
+version = "0.3.8"
+default-features = false
+features = ["std", "perf", "syntax", "meta", "nfa", "hybrid"]
 
 [dev-dependencies]
-glob = "0.3.0"
+glob = "0.3.1"
 lazy_static = "1"
-serde_json = "1.0.45"
+serde_json = "1.0.107"
 
 [features]
 default = ["log"]
diff --git a/crates/globset/src/fnv.rs b/crates/globset/src/fnv.rs
new file mode 100644
index 00000000..91174e20
--- /dev/null
+++ b/crates/globset/src/fnv.rs
@@ -0,0 +1,30 @@
+/// A convenience alias for creating a hash map with an FNV hasher.
+pub(crate) type HashMap<K, V> =
+    std::collections::HashMap<K, V, std::hash::BuildHasherDefault<Hasher>>;
+
+/// A hasher that implements the Fowler–Noll–Vo (FNV) hash.
+pub(crate) struct Hasher(u64);
+
+impl Hasher {
+    const OFFSET_BASIS: u64 = 0xcbf29ce484222325;
+    const PRIME: u64 = 0x100000001b3;
+}
+
+impl Default for Hasher {
+    fn default() -> Hasher {
+        Hasher(Hasher::OFFSET_BASIS)
+    }
+}
+
+impl std::hash::Hasher for Hasher {
+    fn finish(&self) -> u64 {
+        self.0
+    }
+
+    fn write(&mut self, bytes: &[u8]) {
+        for &byte in bytes.iter() {
+            self.0 = self.0 ^ u64::from(byte);
+            self.0 = self.0.wrapping_mul(Hasher::PRIME);
+        }
+    }
+}
diff --git a/crates/globset/src/glob.rs b/crates/globset/src/glob.rs
index d19c70ed..83c08344 100644
--- a/crates/globset/src/glob.rs
+++ b/crates/globset/src/glob.rs
@@ -1,12 +1,6 @@
-use std::fmt;
-use std::hash;
-use std::iter;
-use std::ops::{Deref, DerefMut};
 use std::path::{is_separator, Path};
-use std::str;
 
-use regex;
-use regex::bytes::Regex;
+use regex_automata::meta::Regex;
 
 use crate::{new_regex, Candidate, Error, ErrorKind};
 
@@ -18,7 +12,7 @@ use crate::{new_regex, Candidate, Error, ErrorKind};
 /// possible to test whether any of those patterns matches by looking up a
 /// file path's extension in a hash table.
 #[derive(Clone, Debug, Eq, PartialEq)]
-pub enum MatchStrategy {
+pub(crate) enum MatchStrategy {
     /// A pattern matches if and only if the entire file path matches this
     /// literal string.
     Literal(String),
@@ -53,7 +47,7 @@ pub enum MatchStrategy {
 
 impl MatchStrategy {
     /// Returns a matching strategy for the given pattern.
-    pub fn new(pat: &Glob) -> MatchStrategy {
+    pub(crate) fn new(pat: &Glob) -> MatchStrategy {
         if let Some(lit) = pat.basename_literal() {
             MatchStrategy::BasenameLiteral(lit)
         } else if let Some(lit) = pat.literal() {
@@ -63,7 +57,7 @@ impl MatchStrategy {
         } else if let Some(prefix) = pat.prefix() {
             MatchStrategy::Prefix(prefix)
         } else if let Some((suffix, component)) = pat.suffix() {
-            MatchStrategy::Suffix { suffix: suffix, component: component }
+            MatchStrategy::Suffix { suffix, component }
         } else if let Some(ext) = pat.required_ext() {
             MatchStrategy::RequiredExtension(ext)
         } else {
@@ -90,20 +84,20 @@ impl PartialEq for Glob {
     }
 }
 
-impl hash::Hash for Glob {
-    fn hash<H: hash::Hasher>(&self, state: &mut H) {
+impl std::hash::Hash for Glob {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
         self.glob.hash(state);
         self.opts.hash(state);
     }
 }
 
-impl fmt::Display for Glob {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl std::fmt::Display for Glob {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         self.glob.fmt(f)
     }
 }
 
-impl str::FromStr for Glob {
+impl std::str::FromStr for Glob {
     type Err = Error;
 
     fn from_str(glob: &str) -> Result<Self, Self::Err> {
@@ -227,14 +221,14 @@ impl GlobOptions {
 #[derive(Clone, Debug, Default, Eq, PartialEq)]
 struct Tokens(Vec<Token>);
 
-impl Deref for Tokens {
+impl std::ops::Deref for Tokens {
     type Target = Vec<Token>;
     fn deref(&self) -> &Vec<Token> {
         &self.0
     }
 }
 
-impl DerefMut for Tokens {
+impl std::ops::DerefMut for Tokens {
     fn deref_mut(&mut self) -> &mut Vec<Token> {
         &mut self.0
     }
@@ -262,7 +256,7 @@ impl Glob {
     pub fn compile_matcher(&self) -> GlobMatcher {
         let re =
             new_regex(&self.re).expect("regex compilation shouldn't fail");
-        GlobMatcher { pat: self.clone(), re: re }
+        GlobMatcher { pat: self.clone(), re }
     }
 
     /// Returns a strategic matcher.
@@ -275,7 +269,7 @@ impl Glob {
         let strategy = MatchStrategy::new(self);
         let re =
             new_regex(&self.re).expect("regex compilation shouldn't fail");
-        GlobStrategic { strategy: strategy, re: re }
+        GlobStrategic { strategy, re }
     }
 
     /// Returns the original glob pattern used to build this pattern.
@@ -311,10 +305,8 @@ impl Glob {
         }
         let mut lit = String::new();
         for t in &*self.tokens {
-            match *t {
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
+            let Token::Literal(c) = *t else { return None };
+            lit.push(c);
         }
         if lit.is_empty() {
             None
@@ -334,13 +326,12 @@ impl Glob {
         if self.opts.case_insensitive {
             return None;
         }
-        let start = match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => 1,
-            Some(_) => 0,
-            _ => return None,
+        let start = match *self.tokens.get(0)? {
+            Token::RecursivePrefix => 1,
+            _ => 0,
         };
-        match self.tokens.get(start) {
-            Some(&Token::ZeroOrMore) => {
+        match *self.tokens.get(start)? {
+            Token::ZeroOrMore => {
                 // If there was no recursive prefix, then we only permit
                 // `*` if `*` can match a `/`. For example, if `*` can't
                 // match `/`, then `*.c` doesn't match `foo/bar.c`.
@@ -350,8 +341,8 @@ impl Glob {
             }
             _ => return None,
         }
-        match self.tokens.get(start + 1) {
-            Some(&Token::Literal('.')) => {}
+        match *self.tokens.get(start + 1)? {
+            Token::Literal('.') => {}
             _ => return None,
         }
         let mut lit = ".".to_string();
@@ -405,8 +396,8 @@ impl Glob {
         if self.opts.case_insensitive {
             return None;
         }
-        let (end, need_sep) = match self.tokens.last() {
-            Some(&Token::ZeroOrMore) => {
+        let (end, need_sep) = match *self.tokens.last()? {
+            Token::ZeroOrMore => {
                 if self.opts.literal_separator {
                     // If a trailing `*` can't match a `/`, then we can't
                     // assume a match of the prefix corresponds to a match
@@ -418,15 +409,13 @@ impl Glob {
                 }
                 (self.tokens.len() - 1, false)
             }
-            Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true),
+            Token::RecursiveSuffix => (self.tokens.len() - 1, true),
             _ => (self.tokens.len(), false),
         };
         let mut lit = String::new();
         for t in &self.tokens[0..end] {
-            match *t {
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
+            let Token::Literal(c) = *t else { return None };
+            lit.push(c);
         }
         if need_sep {
             lit.push('/');
@@ -455,8 +444,8 @@ impl Glob {
             return None;
         }
         let mut lit = String::new();
-        let (start, entire) = match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {
+        let (start, entire) = match *self.tokens.get(0)? {
+            Token::RecursivePrefix => {
                 // We only care if this follows a path component if the next
                 // token is a literal.
                 if let Some(&Token::Literal(_)) = self.tokens.get(1) {
@@ -468,8 +457,8 @@ impl Glob {
             }
             _ => (0, false),
         };
-        let start = match self.tokens.get(start) {
-            Some(&Token::ZeroOrMore) => {
+        let start = match *self.tokens.get(start)? {
+            Token::ZeroOrMore => {
                 // If literal_separator is enabled, then a `*` can't
                 // necessarily match everything, so reporting a suffix match
                 // as a match of the pattern would be a false positive.
@@ -481,10 +470,8 @@ impl Glob {
             _ => start,
         };
         for t in &self.tokens[start..] {
-            match *t {
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
+            let Token::Literal(c) = *t else { return None };
+            lit.push(c);
         }
         if lit.is_empty() || lit == "/" {
             None
@@ -508,8 +495,8 @@ impl Glob {
         if self.opts.case_insensitive {
             return None;
         }
-        let start = match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => 1,
+        let start = match *self.tokens.get(0)? {
+            Token::RecursivePrefix => 1,
             _ => {
                 // With nothing to gobble up the parent portion of a path,
                 // we can't assume that matching on only the basename is
@@ -520,7 +507,7 @@ impl Glob {
         if self.tokens[start..].is_empty() {
             return None;
         }
-        for t in &self.tokens[start..] {
+        for t in self.tokens[start..].iter() {
             match *t {
                 Token::Literal('/') => return None,
                 Token::Literal(_) => {} // OK
@@ -554,16 +541,11 @@ impl Glob {
     /// The basic format of these patterns is `**/{literal}`, where `{literal}`
     /// does not contain a path separator.
     fn basename_literal(&self) -> Option<String> {
-        let tokens = match self.basename_tokens() {
-            None => return None,
-            Some(tokens) => tokens,
-        };
+        let tokens = self.basename_tokens()?;
         let mut lit = String::new();
         for t in tokens {
-            match *t {
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
+            let Token::Literal(c) = *t else { return None };
+            lit.push(c);
         }
         Some(lit)
     }
@@ -574,7 +556,7 @@ impl<'a> GlobBuilder<'a> {
     ///
     /// The pattern is not compiled until `build` is called.
     pub fn new(glob: &'a str) -> GlobBuilder<'a> {
-        GlobBuilder { glob: glob, opts: GlobOptions::default() }
+        GlobBuilder { glob, opts: GlobOptions::default() }
     }
 
     /// Parses and builds the pattern.
@@ -604,7 +586,7 @@ impl<'a> GlobBuilder<'a> {
                 glob: self.glob.to_string(),
                 re: tokens.to_regex_with(&self.opts),
                 opts: self.opts,
-                tokens: tokens,
+                tokens,
             })
         }
     }
@@ -640,7 +622,8 @@ impl<'a> GlobBuilder<'a> {
 
     /// Toggle whether an empty pattern in a list of alternates is accepted.
     ///
-    /// For example, if this is set then the glob `foo{,.txt}` will match both `foo` and `foo.txt`.
+    /// For example, if this is set then the glob `foo{,.txt}` will match both
+    /// `foo` and `foo.txt`.
     ///
     /// By default this is false.
     pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
@@ -678,7 +661,7 @@ impl Tokens {
         tokens: &[Token],
         re: &mut String,
     ) {
-        for tok in tokens {
+        for tok in tokens.iter() {
             match *tok {
                 Token::Literal(c) => {
                     re.push_str(&char_to_escaped_literal(c));
@@ -758,7 +741,9 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
     let mut s = String::with_capacity(bs.len());
     for &b in bs {
         if b <= 0x7F {
-            s.push_str(&regex::escape(&(b as char).to_string()));
+            s.push_str(&regex_syntax::escape(
+                char::from(b).encode_utf8(&mut [0; 4]),
+            ));
         } else {
             s.push_str(&format!("\\x{:02x}", b));
         }
@@ -769,7 +754,7 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
 struct Parser<'a> {
     glob: &'a str,
     stack: Vec<Tokens>,
-    chars: iter::Peekable<str::Chars<'a>>,
+    chars: std::iter::Peekable<std::str::Chars<'a>>,
     prev: Option<char>,
     cur: Option<char>,
     opts: &'a GlobOptions,
@@ -777,7 +762,7 @@ struct Parser<'a> {
 
 impl<'a> Parser<'a> {
     fn error(&self, kind: ErrorKind) -> Error {
-        Error { glob: Some(self.glob.to_string()), kind: kind }
+        Error { glob: Some(self.glob.to_string()), kind }
     }
 
     fn parse(&mut self) -> Result<(), Error> {
@@ -996,7 +981,7 @@ impl<'a> Parser<'a> {
             // it as a literal.
             ranges.push(('-', '-'));
         }
-        self.push_token(Token::Class { negated: negated, ranges: ranges })
+        self.push_token(Token::Class { negated, ranges })
     }
 
     fn bump(&mut self) -> Option<char> {
diff --git a/crates/globset/src/lib.rs b/crates/globset/src/lib.rs
index 7a357489..15eeefbb 100644
--- a/crates/globset/src/lib.rs
+++ b/crates/globset/src/lib.rs
@@ -5,11 +5,9 @@ Glob set matching is the process of matching one or more glob patterns against
 a single candidate path simultaneously, and returning all of the globs that
 matched. For example, given this set of globs:
 
-```ignore
-*.rs
-src/lib.rs
-src/**/foo.rs
-```
+* `*.rs`
+* `src/lib.rs`
+* `src/**/foo.rs`
 
 and a path `src/bar/baz/foo.rs`, then the set would report the first and third
 globs as matching.
@@ -19,7 +17,6 @@ globs as matching.
 This example shows how to match a single glob against a single file path.
 
 ```
-# fn example() -> Result<(), globset::Error> {
 use globset::Glob;
 
 let glob = Glob::new("*.rs")?.compile_matcher();
@@ -27,7 +24,7 @@ let glob = Glob::new("*.rs")?.compile_matcher();
 assert!(glob.is_match("foo.rs"));
 assert!(glob.is_match("foo/bar.rs"));
 assert!(!glob.is_match("Cargo.toml"));
-# Ok(()) } example().unwrap();
+# Ok::<(), Box<dyn std::error::Error>>(())
 ```
 
 # Example: configuring a glob matcher
@@ -36,7 +33,6 @@ This example shows how to use a `GlobBuilder` to configure aspects of match
 semantics. In this example, we prevent wildcards from matching path separators.
 
 ```
-# fn example() -> Result<(), globset::Error> {
 use globset::GlobBuilder;
 
 let glob = GlobBuilder::new("*.rs")
@@ -45,7 +41,7 @@ let glob = GlobBuilder::new("*.rs")
 assert!(glob.is_match("foo.rs"));
 assert!(!glob.is_match("foo/bar.rs")); // no longer matches
 assert!(!glob.is_match("Cargo.toml"));
-# Ok(()) } example().unwrap();
+# Ok::<(), Box<dyn std::error::Error>>(())
 ```
 
 # Example: match multiple globs at once
@@ -53,7 +49,6 @@ assert!(!glob.is_match("Cargo.toml"));
 This example shows how to match multiple glob patterns at once.
 
 ```
-# fn example() -> Result<(), globset::Error> {
 use globset::{Glob, GlobSetBuilder};
 
 let mut builder = GlobSetBuilder::new();
@@ -65,7 +60,7 @@ builder.add(Glob::new("src/**/foo.rs")?);
 let set = builder.build()?;
 
 assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
-# Ok(()) } example().unwrap();
+# Ok::<(), Box<dyn std::error::Error>>(())
 ```
 
 # Syntax
@@ -103,22 +98,22 @@ or to enable case insensitive matching.
 
 #![deny(missing_docs)]
 
-use std::borrow::Cow;
-use std::collections::{BTreeMap, HashMap};
-use std::error::Error as StdError;
-use std::fmt;
-use std::hash;
-use std::path::Path;
-use std::str;
+use std::{borrow::Cow, path::Path};
+
+use {
+    aho_corasick::AhoCorasick,
+    bstr::{ByteSlice, ByteVec, B},
+    regex_automata::meta::Regex,
+};
 
-use aho_corasick::AhoCorasick;
-use bstr::{ByteSlice, ByteVec, B};
-use regex::bytes::{Regex, RegexBuilder, RegexSet};
+use crate::{
+    glob::MatchStrategy,
+    pathutil::{file_name, file_name_ext, normalize_path},
+};
 
-use crate::glob::MatchStrategy;
 pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
-use crate::pathutil::{file_name, file_name_ext, normalize_path};
 
+mod fnv;
 mod glob;
 mod pathutil;
 
@@ -181,7 +176,7 @@ pub enum ErrorKind {
     __Nonexhaustive,
 }
 
-impl StdError for Error {
+impl std::error::Error for Error {
     fn description(&self) -> &str {
         self.kind.description()
     }
@@ -227,8 +222,8 @@ impl ErrorKind {
     }
 }
 
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl std::fmt::Display for Error {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self.glob {
             None => self.kind.fmt(f),
             Some(ref glob) => {
@@ -238,8 +233,8 @@ impl fmt::Display for Error {
     }
 }
 
-impl fmt::Display for ErrorKind {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl std::fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match *self {
             ErrorKind::InvalidRecursive
             | ErrorKind::UnclosedClass
@@ -257,30 +252,40 @@ impl fmt::Display for ErrorKind {
 }
 
 fn new_regex(pat: &str) -> Result<Regex, Error> {
-    RegexBuilder::new(pat)
-        .dot_matches_new_line(true)
-        .size_limit(10 * (1 << 20))
-        .dfa_size_limit(10 * (1 << 20))
-        .build()
-        .map_err(|err| Error {
+    let syntax = regex_automata::util::syntax::Config::new()
+        .utf8(false)
+        .dot_matches_new_line(true);
+    let config = Regex::config()
+        .utf8_empty(false)
+        .nfa_size_limit(Some(10 * (1 << 20)))
+        .hybrid_cache_capacity(10 * (1 << 20));
+    Regex::builder().syntax(syntax).configure(config).build(pat).map_err(
+        |err| Error {
             glob: Some(pat.to_string()),
             kind: ErrorKind::Regex(err.to_string()),
+        },
+    )
+}
+
+fn new_regex_set(pats: Vec<String>) -> Result<Regex, Error> {
+    let syntax = regex_automata::util::syntax::Config::new()
+        .utf8(false)
+        .dot_matches_new_line(true);
+    let config = Regex::config()
+        .match_kind(regex_automata::MatchKind::All)
+        .utf8_empty(false)
+        .nfa_size_limit(Some(10 * (1 << 20)))
+        .hybrid_cache_capacity(10 * (1 << 20));
+    Regex::builder()
+        .syntax(syntax)
+        .configure(config)
+        .build_many(&pats)
+        .map_err(|err| Error {
+            glob: None,
+            kind: ErrorKind::Regex(err.to_string()),
         })
 }
 
-fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
-where
-    S: AsRef<str>,
-    I: IntoIterator<Item = S>,
-{
-    RegexSet::new(pats).map_err(|err| Error {
-        glob: None,
-        kind: ErrorKind::Regex(err.to_string()),
-    })
-}
-
-type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
-
 /// GlobSet represents a group of globs that can be matched together in a
 /// single pass.
 #[derive(Clone, Debug)]
@@ -521,7 +526,7 @@ impl<'a> Candidate<'a> {
         let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
         let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
         let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
-        Candidate { path: path, basename: basename, ext: ext }
+        Candidate { path, basename, ext }
     }
 
     fn path_prefix(&self, max: usize) -> &[u8] {
@@ -585,11 +590,11 @@ impl GlobSetMatchStrategy {
 }
 
 #[derive(Clone, Debug)]
-struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
+struct LiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
 
 impl LiteralStrategy {
     fn new() -> LiteralStrategy {
-        LiteralStrategy(BTreeMap::new())
+        LiteralStrategy(fnv::HashMap::default())
     }
 
     fn add(&mut self, global_index: usize, lit: String) {
@@ -613,11 +618,11 @@ impl LiteralStrategy {
 }
 
 #[derive(Clone, Debug)]
-struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
+struct BasenameLiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
 
 impl BasenameLiteralStrategy {
     fn new() -> BasenameLiteralStrategy {
-        BasenameLiteralStrategy(BTreeMap::new())
+        BasenameLiteralStrategy(fnv::HashMap::default())
     }
 
     fn add(&mut self, global_index: usize, lit: String) {
@@ -647,11 +652,11 @@ impl BasenameLiteralStrategy {
 }
 
 #[derive(Clone, Debug)]
-struct ExtensionStrategy(HashMap<Vec<u8>, Vec<usize>, Fnv>);
+struct ExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
 
 impl ExtensionStrategy {
     fn new() -> ExtensionStrategy {
-        ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
+        ExtensionStrategy(fnv::HashMap::default())
     }
 
     fn add(&mut self, global_index: usize, ext: String) {
@@ -745,7 +750,7 @@ impl SuffixStrategy {
 }
 
 #[derive(Clone, Debug)]
-struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
+struct RequiredExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<(usize, Regex)>>);
 
 impl RequiredExtensionStrategy {
     fn is_match(&self, candidate: &Candidate<'_>) -> bool {
@@ -786,8 +791,9 @@ impl RequiredExtensionStrategy {
 
 #[derive(Clone, Debug)]
 struct RegexSetStrategy {
-    matcher: RegexSet,
+    matcher: Regex,
     map: Vec<usize>,
+    // patset: regex_automata::PatternSet,
 }
 
 impl RegexSetStrategy {
@@ -800,7 +806,11 @@ impl RegexSetStrategy {
         candidate: &Candidate<'_>,
         matches: &mut Vec<usize>,
     ) {
-        for i in self.matcher.matches(candidate.path.as_bytes()) {
+        let input = regex_automata::Input::new(candidate.path.as_bytes());
+        let mut patset =
+            regex_automata::PatternSet::new(self.matcher.pattern_len());
+        self.matcher.which_overlapping_matches(&input, &mut patset);
+        for i in patset.iter() {
             matches.push(self.map[i]);
         }
     }
@@ -852,12 +862,12 @@ impl MultiStrategyBuilder {
 
 #[derive(Clone, Debug)]
 struct RequiredExtensionStrategyBuilder(
-    HashMap<Vec<u8>, Vec<(usize, String)>>,
+    fnv::HashMap<Vec<u8>, Vec<(usize, String)>>,
 );
 
 impl RequiredExtensionStrategyBuilder {
     fn new() -> RequiredExtensionStrategyBuilder {
-        RequiredExtensionStrategyBuilder(HashMap::new())
+        RequiredExtensionStrategyBuilder(fnv::HashMap::default())
     }
 
     fn add(&mut self, global_index: usize, ext: String, regex: String) {
@@ -868,7 +878,7 @@ impl RequiredExtensionStrategyBuilder {
     }
 
     fn build(self) -> Result<RequiredExtensionStrategy, Error> {
-        let mut exts = HashMap::with_hasher(Fnv::default());
+        let mut exts = fnv::HashMap::default();
         for (ext, regexes) in self.0.into_iter() {
             exts.insert(ext.clone(), vec![]);
             for (global_index, regex) in regexes {
diff --git a/crates/globset/src/pathutil.rs b/crates/globset/src/pathutil.rs
index 522df340..8488e74f 100644
--- a/crates/globset/src/pathutil.rs
+++ b/crates/globset/src/pathutil.rs
@@ -4,12 +4,10 @@ use bstr::{ByteSlice, ByteVec};
 
 /// The final component of the path, if it is a normal file.
 ///
-/// If the path terminates in ., .., or consists solely of a root of prefix,
-/// file_name will return None.
-pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
-    if path.is_empty() {
-        return None;
-    } else if path.last_byte() == Some(b'.') {
+/// If the path terminates in `.`, `..`, or consists solely of a root of
+/// prefix, file_name will return None.
+pub(crate) fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
+    if path.last_byte().map_or(true, |b| b == b'.') {
         return None;
     }
     let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
@@ -39,7 +37,9 @@ pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
 /// a pattern like `*.rs` is obviously trying to match files with a `rs`
 /// extension, but it also matches files like `.rs`, which doesn't have an
 /// extension according to std::path::Path::extension.
-pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
+pub(crate) fn file_name_ext<'a>(
+    name: &Cow<'a, [u8]>,
+) -> Option<Cow<'a, [u8]>> {
     if name.is_empty() {
         return None;
     }
@@ -60,7 +60,7 @@ pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
 /// Normalizes a path to use `/` as a separator everywhere, even on platforms
 /// that recognize other characters as separators.
 #[cfg(unix)]
-pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
+pub(crate) fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
     // UNIX only uses /, so we're good.
     path
 }
@@ -68,11 +68,11 @@ pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
 /// Normalizes a path to use `/` as a separator everywhere, even on platforms
 /// that recognize other characters as separators.
 #[cfg(not(unix))]
-pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
+pub(crate) fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
     use std::path::is_separator;
 
     for i in 0..path.len() {
-        if path[i] == b'/' || !is_separator(path[i] as char) {
+        if path[i] == b'/' || !is_separator(char::from(path[i])) {
             continue;
         }
         path.to_mut()[i] = b'/';
author	Andrew Gallant <jamslam@gmail.com>	2023-09-26 15:01:20 -0400
committer	Andrew Gallant <jamslam@gmail.com>	2023-10-09 20:29:52 -0400
commit	7f456404010005142a6493fb48bd9fd06aca2731 (patch)
tree	4ab69c918f3d186e7b911d7962e93acd40be34d4 /crates/globset
parent	0951820f63a1e38d755b2905e389e09c9b569040 (diff)