summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Malehorn <bmalehorn@gmail.com>2018-02-22 23:13:36 -0800
committerAndrew Gallant <jamslam@gmail.com>2018-03-10 09:30:55 -0500
commite2516ed0957b10c0a2d49e5cc402c44f04f30a43 (patch)
treedde638a40b7cba7d8c8820caaa0180ef36c65a59
parentc0c80e0209d395e1fbaa1fc8e2734bd387e89f05 (diff)
globset: support backslash escaping
From `man 7 glob`: One can remove the special meaning of '?', '*' and '[' by preceding them by a backslash, or, in case this is part of a shell command line, enclosing them in quotes. Conform to glob / fnmatch / git implementations by making `\` escape the following character - for example `\?` will match a literal `?`. However, only enable this by default on Unix platforms. Windows builds will continue to use `\` as a path separator, but can still get the new behavior by calling `globset.backslash_escape(true)`. Adding tests for the `Globset::backslash_escape` option was a bit involved, since the default value of this option is platform-dependent. Extend the options framework to hold an `Option<T>` for each knob, where `None` means "default" and `Some(v)` means "override with `v`". This way we only have to specify the default values once in `GlobOptions::default()` rather than replicated in both code and tests. Finally write a few behavioral tests, and some tests to confirm it varies by platform.
-rw-r--r--globset/src/glob.rs155
-rw-r--r--globset/src/lib.rs11
2 files changed, 131 insertions, 35 deletions
diff --git a/globset/src/glob.rs b/globset/src/glob.rs
index 0bdb9b45..062f5a64 100644
--- a/globset/src/glob.rs
+++ b/globset/src/glob.rs
@@ -187,13 +187,26 @@ pub struct GlobBuilder<'a> {
opts: GlobOptions,
}
-#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
struct GlobOptions {
/// Whether to match case insensitively.
case_insensitive: bool,
/// Whether to require a literal separator to match a separator in a file
/// path. e.g., when enabled, `*` won't match `/`.
literal_separator: bool,
+ /// Whether or not to use `\` to escape special characters.
+ /// e.g., when enabled, `\*` will match a literal `*`.
+ backslash_escape: bool,
+}
+
+impl GlobOptions {
+ fn default() -> GlobOptions {
+ GlobOptions {
+ case_insensitive: false,
+ literal_separator: false,
+ backslash_escape: !is_separator('\\'),
+ }
+ }
}
#[derive(Clone, Debug, Default, Eq, PartialEq)]
@@ -549,6 +562,7 @@ impl<'a> GlobBuilder<'a> {
chars: self.glob.chars().peekable(),
prev: None,
cur: None,
+ opts: &self.opts,
};
p.parse()?;
if p.stack.is_empty() {
@@ -585,6 +599,19 @@ impl<'a> GlobBuilder<'a> {
self.opts.literal_separator = yes;
self
}
+
+ /// When enabled, a back slash (`\`) may be used to escape
+ /// special characters in a glob pattern. Additionally, this will
+ /// prevent `\` from being interpreted as a path separator on all
+ /// platforms.
+ ///
+ /// This is enabled by default on platforms where `\` is not a
+ /// path separator and disabled by default on platforms where `\`
+ /// is a path separator.
+ pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
+ self.opts.backslash_escape = yes;
+ self
+ }
}
impl Tokens {
@@ -710,6 +737,7 @@ struct Parser<'a> {
chars: iter::Peekable<str::Chars<'a>>,
prev: Option<char>,
cur: Option<char>,
+ opts: &'a GlobOptions,
}
impl<'a> Parser<'a> {
@@ -726,14 +754,8 @@ impl<'a> Parser<'a> {
'{' => self.push_alternate()?,
'}' => self.pop_alternate()?,
',' => self.parse_comma()?,
- c => {
- if is_separator(c) {
- // Normalize all patterns to use / as a separator.
- self.push_token(Token::Literal('/'))?
- } else {
- self.push_token(Token::Literal(c))?
- }
- }
+ '\\' => self.parse_backslash()?,
+ c => self.push_token(Token::Literal(c))?,
}
}
Ok(())
@@ -786,6 +808,20 @@ impl<'a> Parser<'a> {
}
}
+ fn parse_backslash(&mut self) -> Result<(), Error> {
+ if self.opts.backslash_escape {
+ match self.bump() {
+ None => Err(self.error(ErrorKind::DanglingEscape)),
+ Some(c) => self.push_token(Token::Literal(c)),
+ }
+ } else if is_separator('\\') {
+ // Normalize all patterns to use / as a separator.
+ self.push_token(Token::Literal('/'))
+ } else {
+ self.push_token(Token::Literal('\\'))
+ }
+ }
+
fn parse_star(&mut self) -> Result<(), Error> {
let prev = self.prev;
if self.chars.peek() != Some(&'*') {
@@ -933,8 +969,9 @@ mod tests {
#[derive(Clone, Copy, Debug, Default)]
struct Options {
- casei: bool,
- litsep: bool,
+ casei: Option<bool>,
+ litsep: Option<bool>,
+ bsesc: Option<bool>,
}
macro_rules! syntax {
@@ -964,11 +1001,17 @@ mod tests {
($name:ident, $pat:expr, $re:expr, $options:expr) => {
#[test]
fn $name() {
- let pat = GlobBuilder::new($pat)
- .case_insensitive($options.casei)
- .literal_separator($options.litsep)
- .build()
- .unwrap();
+ let mut builder = GlobBuilder::new($pat);
+ if let Some(casei) = $options.casei {
+ builder.case_insensitive(casei);
+ }
+ if let Some(litsep) = $options.litsep {
+ builder.literal_separator(litsep);
+ }
+ if let Some(bsesc) = $options.bsesc {
+ builder.backslash_escape(bsesc);
+ }
+ let pat = builder.build().unwrap();
assert_eq!(format!("(?-u){}", $re), pat.regex());
}
};
@@ -981,11 +1024,17 @@ mod tests {
($name:ident, $pat:expr, $path:expr, $options:expr) => {
#[test]
fn $name() {
- let pat = GlobBuilder::new($pat)
- .case_insensitive($options.casei)
- .literal_separator($options.litsep)
- .build()
- .unwrap();
+ let mut builder = GlobBuilder::new($pat);
+ if let Some(casei) = $options.casei {
+ builder.case_insensitive(casei);
+ }
+ if let Some(litsep) = $options.litsep {
+ builder.literal_separator(litsep);
+ }
+ if let Some(bsesc) = $options.bsesc {
+ builder.backslash_escape(bsesc);
+ }
+ let pat = builder.build().unwrap();
let matcher = pat.compile_matcher();
let strategic = pat.compile_strategic_matcher();
let set = GlobSetBuilder::new().add(pat).build().unwrap();
@@ -1003,11 +1052,17 @@ mod tests {
($name:ident, $pat:expr, $path:expr, $options:expr) => {
#[test]
fn $name() {
- let pat = GlobBuilder::new($pat)
- .case_insensitive($options.casei)
- .literal_separator($options.litsep)
- .build()
- .unwrap();
+ let mut builder = GlobBuilder::new($pat);
+ if let Some(casei) = $options.casei {
+ builder.case_insensitive(casei);
+ }
+ if let Some(litsep) = $options.litsep {
+ builder.literal_separator(litsep);
+ }
+ if let Some(bsesc) = $options.bsesc {
+ builder.backslash_escape(bsesc);
+ }
+ let pat = builder.build().unwrap();
let matcher = pat.compile_matcher();
let strategic = pat.compile_strategic_matcher();
let set = GlobSetBuilder::new().add(pat).build().unwrap();
@@ -1091,12 +1146,24 @@ mod tests {
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
const CASEI: Options = Options {
- casei: true,
- litsep: false,
+ casei: Some(true),
+ litsep: None,
+ bsesc: None,
};
const SLASHLIT: Options = Options {
- casei: false,
- litsep: true,
+ casei: None,
+ litsep: Some(true),
+ bsesc: None,
+ };
+ const NOBSESC: Options = Options {
+ casei: None,
+ litsep: None,
+ bsesc: Some(false),
+ };
+ const BSESC: Options = Options {
+ casei: None,
+ litsep: None,
+ bsesc: Some(true),
};
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
@@ -1209,6 +1276,17 @@ mod tests {
#[cfg(not(unix))]
matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
+ matches!(matchbackslash1, "\\[", "[", BSESC);
+ matches!(matchbackslash2, "\\?", "?", BSESC);
+ matches!(matchbackslash3, "\\*", "*", BSESC);
+ matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
+ matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
+ matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
+ #[cfg(unix)]
+ matches!(matchbackslash7, "\\a", "a");
+ #[cfg(not(unix))]
+ matches!(matchbackslash8, "\\a", "/a");
+
nmatches!(matchnot1, "a*b*c", "abcd");
nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
@@ -1253,13 +1331,20 @@ mod tests {
($which:ident, $name:ident, $pat:expr, $expect:expr) => {
extract!($which, $name, $pat, $expect, Options::default());
};
- ($which:ident, $name:ident, $pat:expr, $expect:expr, $opts:expr) => {
+ ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
#[test]
fn $name() {
- let pat = GlobBuilder::new($pat)
- .case_insensitive($opts.casei)
- .literal_separator($opts.litsep)
- .build().unwrap();
+ let mut builder = GlobBuilder::new($pat);
+ if let Some(casei) = $options.casei {
+ builder.case_insensitive(casei);
+ }
+ if let Some(litsep) = $options.litsep {
+ builder.literal_separator(litsep);
+ }
+ if let Some(bsesc) = $options.bsesc {
+ builder.backslash_escape(bsesc);
+ }
+ let pat = builder.build().unwrap();
assert_eq!($expect, pat.$which());
}
};
diff --git a/globset/src/lib.rs b/globset/src/lib.rs
index af11dff7..fb95ce75 100644
--- a/globset/src/lib.rs
+++ b/globset/src/lib.rs
@@ -91,6 +91,11 @@ Standard Unix-style glob syntax is supported:
`[!ab]` to match any character except for `a` and `b`.
* Metacharacters such as `*` and `?` can be escaped with character class
notation. e.g., `[*]` matches `*`.
+* When backslash escapes are enabled, a backslash (`\`) will escape all meta
+ characters in a glob. If it precedes a non-meta character, then the slash is
+ ignored. A `\\` will match a literal `\\`. Note that this mode is only
+ enabled on Unix platforms by default, but can be enabled on any platform
+ via the `backslash_escape` setting on `Glob`.
A `GlobBuilder` can be used to prevent wildcards from matching path separators,
or to enable case insensitive matching.
@@ -154,6 +159,8 @@ pub enum ErrorKind {
/// Occurs when an alternating group is nested inside another alternating
/// group, e.g., `{{a,b},{c,d}}`.
NestedAlternates,
+ /// Occurs when an unescaped '\' is found at the end of a glob.
+ DanglingEscape,
/// An error associated with parsing or compiling a regex.
Regex(String),
}
@@ -199,6 +206,9 @@ impl ErrorKind {
ErrorKind::NestedAlternates => {
"nested alternate groups are not allowed"
}
+ ErrorKind::DanglingEscape => {
+ "dangling '\\'"
+ }
ErrorKind::Regex(ref err) => err,
}
}
@@ -223,6 +233,7 @@ impl fmt::Display for ErrorKind {
| ErrorKind::UnopenedAlternates
| ErrorKind::UnclosedAlternates
| ErrorKind::NestedAlternates
+ | ErrorKind::DanglingEscape
| ErrorKind::Regex(_) => {
write!(f, "{}", self.description())
}