summaryrefslogtreecommitdiffstats
path: root/globset
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2018-07-21 13:23:46 -0400
committerAndrew Gallant <jamslam@gmail.com>2018-07-21 13:23:46 -0400
commitd09e2f6af1821c4d6fcdcfb7278e57c76c1f5c76 (patch)
tree372878f2c6e9392bf4b3cb75c5de93006cc5c992 /globset
parent7b6af5a177b90bc5002395577a6eb35f8c8b8931 (diff)
globset: clarify documentation on regex method
This makes it clear that the `bytes` API of the regex crate should be used instead of the Unicode API. Fixes #985
Diffstat (limited to 'globset')
-rw-r--r--globset/src/glob.rs13
1 files changed, 13 insertions, 0 deletions
diff --git a/globset/src/glob.rs b/globset/src/glob.rs
index 062f5a64..cbbc7bad 100644
--- a/globset/src/glob.rs
+++ b/globset/src/glob.rs
@@ -275,6 +275,19 @@ impl Glob {
}
/// Returns the regular expression string for this glob.
+ ///
+ /// Note that regular expressions for globs are intended to be matched on
+ /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
+ /// particular, globs are frequently used on file paths, where there is no
+ /// general guarantee that file paths are themselves valid UTF-8. As a
+ /// result, callers will need to ensure that they are using a regex API
+ /// that can match on arbitrary bytes. For example, the
+ /// [`regex`](https://crates.io/regex)
+ /// crate's
+ /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
+ /// API is not suitable for this since it matches on `&str`, but its
+ /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
+ /// API is suitable for this.
pub fn regex(&self) -> &str {
&self.re
}