summaryrefslogtreecommitdiffstats
path: root/globset
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2019-01-23 19:46:15 -0500
committerAndrew Gallant <jamslam@gmail.com>2019-01-23 19:59:39 -0500
commit9c940b45f4f81b51eee7dcf5836fb3680cf6b62c (patch)
tree8a8814010eeeb6c69b9b248764bb19ee23001c57 /globset
parent0a167021c36f6946fc586f4c76e66ab627f4cc04 (diff)
globset: permit ** to appear anywhere
Previously, `man gitignore` specified that `**` was invalid unless it was used in one of a few specific circumstances, i.e., `**`, `a/**`, `**/b` or `a/**/b`. That is, `**` always had to be surrounded by either a path separator or the beginning/end of the pattern. It turns out that git itself has treated `**` outside the above contexts as valid for quite a while, so there was an inconsistency between the spec `man gitignore` and the implementation, and it wasn't clear which was actually correct. @okdana filed a bug against git[1] and got this fixed. The spec was wrong, which has now been fixed [2] and updated[2]. This commit brings ripgrep in line with git and treats `**` outside of the above contexts as two consecutive `*` patterns. We deprecate the `InvalidRecursive` error since it is no longer used. Fixes #373, Fixes #1098 [1] - https://public-inbox.org/git/C16A9F17-0375-42F9-90A9-A92C9F3D8BBA@dana.is [2] - https://github.com/git/git/commit/627186d0206dcb219c43f8e6670b4487802a4921 [3] - https://git-scm.com/docs/gitignore
Diffstat (limited to 'globset')
-rw-r--r--globset/src/glob.rs49
-rw-r--r--globset/src/lib.rs9
2 files changed, 38 insertions, 20 deletions
diff --git a/globset/src/glob.rs b/globset/src/glob.rs
index 53d44e15..eccfb2d3 100644
--- a/globset/src/glob.rs
+++ b/globset/src/glob.rs
@@ -837,40 +837,49 @@ impl<'a> Parser<'a> {
fn parse_star(&mut self) -> Result<(), Error> {
let prev = self.prev;
- if self.chars.peek() != Some(&'*') {
+ if self.peek() != Some('*') {
self.push_token(Token::ZeroOrMore)?;
return Ok(());
}
assert!(self.bump() == Some('*'));
if !self.have_tokens()? {
- self.push_token(Token::RecursivePrefix)?;
- let next = self.bump();
- if !next.map(is_separator).unwrap_or(true) {
- return Err(self.error(ErrorKind::InvalidRecursive));
+ if !self.peek().map_or(true, is_separator) {
+ self.push_token(Token::ZeroOrMore)?;
+ self.push_token(Token::ZeroOrMore)?;
+ } else {
+ self.push_token(Token::RecursivePrefix)?;
+ assert!(self.bump().map_or(true, is_separator));
}
return Ok(());
}
if !prev.map(is_separator).unwrap_or(false) {
if self.stack.len() <= 1
- || (prev != Some(',') && prev != Some('{')) {
- return Err(self.error(ErrorKind::InvalidRecursive));
+ || (prev != Some(',') && prev != Some('{'))
+ {
+ self.push_token(Token::ZeroOrMore)?;
+ self.push_token(Token::ZeroOrMore)?;
+ return Ok(());
}
}
let is_suffix =
- match self.chars.peek() {
+ match self.peek() {
None => {
assert!(self.bump().is_none());
true
}
- Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
+ Some(',') | Some('}') if self.stack.len() >= 2 => {
true
}
- Some(&c) if is_separator(c) => {
+ Some(c) if is_separator(c) => {
assert!(self.bump().map(is_separator).unwrap_or(false));
false
}
- _ => return Err(self.error(ErrorKind::InvalidRecursive)),
+ _ => {
+ self.push_token(Token::ZeroOrMore)?;
+ self.push_token(Token::ZeroOrMore)?;
+ return Ok(());
+ }
};
match self.pop_token()? {
Token::RecursivePrefix => {
@@ -976,6 +985,10 @@ impl<'a> Parser<'a> {
self.cur = self.chars.next();
self.cur
}
+
+ fn peek(&mut self) -> Option<char> {
+ self.chars.peek().map(|&ch| ch)
+ }
}
#[cfg(test)]
@@ -1161,13 +1174,6 @@ mod tests {
syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
- syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
- syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
- syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
- syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
- syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
- syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
- syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
@@ -1228,6 +1234,13 @@ mod tests {
toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
+ toregex!(re28, "a**", r"^a.*.*$");
+ toregex!(re29, "**a", r"^.*.*a$");
+ toregex!(re30, "a**b", r"^a.*.*b$");
+ toregex!(re31, "***", r"^.*.*.*$");
+ toregex!(re32, "/a**", r"^/a.*.*$");
+ toregex!(re33, "/**a", r"^/.*.*a$");
+ toregex!(re34, "/a**b", r"^/a.*.*b$");
matches!(match1, "a", "a");
matches!(match2, "a*b", "a_b");
diff --git a/globset/src/lib.rs b/globset/src/lib.rs
index 8d26e187..7196b8f2 100644
--- a/globset/src/lib.rs
+++ b/globset/src/lib.rs
@@ -143,8 +143,13 @@ pub struct Error {
/// The kind of error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ErrorKind {
- /// Occurs when a use of `**` is invalid. Namely, `**` can only appear
- /// adjacent to a path separator, or the beginning/end of a glob.
+ /// **DEPRECATED**.
+ ///
+ /// This error used to occur for consistency with git's glob specification,
+ /// but the specification now accepts all uses of `**`. When `**` does not
+ /// appear adjacent to a path separator or at the beginning/end of a glob,
+ /// it is now treated as two consecutive `*` patterns. As such, this error
+ /// is no longer used.
InvalidRecursive,
/// Occurs when a character class (e.g., `[abc]`) is not closed.
UnclosedClass,