summaryrefslogtreecommitdiffstats
path: root/src/gitignore.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/gitignore.rs')
-rw-r--r--src/gitignore.rs455
1 files changed, 0 insertions, 455 deletions
diff --git a/src/gitignore.rs b/src/gitignore.rs
deleted file mode 100644
index 9daeb3cb..00000000
--- a/src/gitignore.rs
+++ /dev/null
@@ -1,455 +0,0 @@
-/*!
-The gitignore module provides a way of reading a gitignore file and applying
-it to a particular file name to determine whether it should be ignore or not.
-The motivation for this submodule is performance and portability:
-
-1. There is a gitignore crate on crates.io, but it uses the standard `glob`
- crate and checks patterns one-by-one. This is a reasonable implementation,
- but not suitable for the performance we need here.
-2. We could shell out to a `git` sub-command like ls-files or status, but it
- seems better to not rely on the existence of external programs for a search
- tool. Besides, we need to implement this logic anyway to support things like
- an .ignore file.
-
-The key implementation detail here is that a single gitignore file is compiled
-into a single RegexSet, which can be used to report which globs match a
-particular file name. We can then do a quick post-processing step to implement
-additional rules such as whitelists (prefix of `!`) or directory-only globs
-(suffix of `/`).
-*/
-
-// TODO(burntsushi): Implement something similar, but for Mercurial. We can't
-// use this exact implementation because hgignore files are different.
-
-use std::cell::RefCell;
-use std::error::Error as StdError;
-use std::fmt;
-use std::fs::File;
-use std::io::{self, BufRead};
-use std::path::{Path, PathBuf};
-use std::sync::Arc;
-
-use globset::{self, Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
-use regex;
-use thread_local::ThreadLocal;
-
-use pathutil::{is_file_name, strip_prefix};
-
-/// Represents an error that can occur when parsing a gitignore file.
-#[derive(Debug)]
-pub enum Error {
- Glob(globset::Error),
- Regex(regex::Error),
- Io(io::Error),
-}
-
-impl StdError for Error {
- fn description(&self) -> &str {
- match *self {
- Error::Glob(ref err) => err.description(),
- Error::Regex(ref err) => err.description(),
- Error::Io(ref err) => err.description(),
- }
- }
-}
-
-impl fmt::Display for Error {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- match *self {
- Error::Glob(ref err) => err.fmt(f),
- Error::Regex(ref err) => err.fmt(f),
- Error::Io(ref err) => err.fmt(f),
- }
- }
-}
-
-impl From<globset::Error> for Error {
- fn from(err: globset::Error) -> Error {
- Error::Glob(err)
- }
-}
-
-impl From<regex::Error> for Error {
- fn from(err: regex::Error) -> Error {
- Error::Regex(err)
- }
-}
-
-impl From<io::Error> for Error {
- fn from(err: io::Error) -> Error {
- Error::Io(err)
- }
-}
-
-/// Gitignore is a matcher for the glob patterns in a single gitignore file.
-#[derive(Clone, Debug)]
-pub struct Gitignore {
- set: GlobSet,
- root: PathBuf,
- patterns: Vec<Pattern>,
- num_ignores: u64,
- num_whitelist: u64,
- matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
-}
-
-impl Gitignore {
- /// Create a new gitignore glob matcher from the given root directory and
- /// string containing the contents of a gitignore file.
- #[allow(dead_code)]
- fn from_str<P: AsRef<Path>>(
- root: P,
- gitignore: &str,
- ) -> Result<Gitignore, Error> {
- let mut builder = GitignoreBuilder::new(root);
- try!(builder.add_str(gitignore));
- builder.build()
- }
-
- /// Returns true if and only if the given file path should be ignored
- /// according to the globs in this gitignore. `is_dir` should be true if
- /// the path refers to a directory and false otherwise.
- ///
- /// Before matching path, its prefix (as determined by a common suffix
- /// of the directory containing this gitignore) is stripped. If there is
- /// no common suffix/prefix overlap, then path is assumed to reside in the
- /// same directory as this gitignore file.
- pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
- let mut path = path.as_ref();
- if let Some(p) = strip_prefix("./", path) {
- path = p;
- }
- // Strip any common prefix between the candidate path and the root
- // of the gitignore, to make sure we get relative matching right.
- // BUT, a file name might not have any directory components to it,
- // in which case, we don't want to accidentally strip any part of the
- // file name.
- if !is_file_name(path) {
- if let Some(p) = strip_prefix(&self.root, path) {
- path = p;
- }
- }
- if let Some(p) = strip_prefix("/", path) {
- path = p;
- }
- self.matched_stripped(path, is_dir)
- }
-
- /// Like matched, but takes a path that has already been stripped.
- pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match {
- let _matches = self.matches.get_default();
- let mut matches = _matches.borrow_mut();
- let candidate = Candidate::new(path);
- self.set.matches_candidate_into(&candidate, &mut *matches);
- for &i in matches.iter().rev() {
- let pat = &self.patterns[i];
- if !pat.only_dir || is_dir {
- return if pat.whitelist {
- Match::Whitelist(pat)
- } else {
- Match::Ignored(pat)
- };
- }
- }
- Match::None
- }
-
- /// Returns the total number of ignore patterns.
- pub fn num_ignores(&self) -> u64 {
- self.num_ignores
- }
-}
-
-/// The result of a glob match.
-///
-/// The lifetime `'a` refers to the lifetime of the pattern that resulted in
-/// a match (whether ignored or whitelisted).
-#[derive(Clone, Debug)]
-pub enum Match<'a> {
- /// The path didn't match any glob in the gitignore file.
- None,
- /// The last glob matched indicates the path should be ignored.
- Ignored(&'a Pattern),
- /// The last glob matched indicates the path should be whitelisted.
- Whitelist(&'a Pattern),
-}
-
-impl<'a> Match<'a> {
- /// Returns true if the match result implies the path should be ignored.
- #[allow(dead_code)]
- pub fn is_ignored(&self) -> bool {
- match *self {
- Match::Ignored(_) => true,
- Match::None | Match::Whitelist(_) => false,
- }
- }
-
- /// Returns true if the match result didn't match any globs.
- pub fn is_none(&self) -> bool {
- match *self {
- Match::None => true,
- Match::Ignored(_) | Match::Whitelist(_) => false,
- }
- }
-
- /// Inverts the match so that Ignored becomes Whitelisted and Whitelisted
- /// becomes Ignored. A non-match remains the same.
- pub fn invert(self) -> Match<'a> {
- match self {
- Match::None => Match::None,
- Match::Ignored(pat) => Match::Whitelist(pat),
- Match::Whitelist(pat) => Match::Ignored(pat),
- }
- }
-}
-
-/// GitignoreBuilder constructs a matcher for a single set of globs from a
-/// .gitignore file.
-pub struct GitignoreBuilder {
- builder: GlobSetBuilder,
- root: PathBuf,
- patterns: Vec<Pattern>,
-}
-
-/// Pattern represents a single pattern in a gitignore file. It doesn't
-/// know how to do glob matching directly, but it does store additional
-/// options on a pattern, such as whether it's whitelisted.
-#[derive(Clone, Debug)]
-pub struct Pattern {
- /// The file path that this pattern was extracted from (may be empty).
- pub from: PathBuf,
- /// The original glob pattern string.
- pub original: String,
- /// The actual glob pattern string used to convert to a regex.
- pub pat: String,
- /// Whether this is a whitelisted pattern or not.
- pub whitelist: bool,
- /// Whether this pattern should only match directories or not.
- pub only_dir: bool,
-}
-
-impl GitignoreBuilder {
- /// Create a new builder for a gitignore file.
- ///
- /// The path given should be the path at which the globs for this gitignore
- /// file should be matched.
- pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
- let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
- GitignoreBuilder {
- builder: GlobSetBuilder::new(),
- root: root.to_path_buf(),
- patterns: vec![],
- }
- }
-
- /// Builds a new matcher from the glob patterns added so far.
- ///
- /// Once a matcher is built, no new glob patterns can be added to it.
- pub fn build(self) -> Result<Gitignore, Error> {
- let nignores = self.patterns.iter().filter(|p| !p.whitelist).count();
- let nwhitelist = self.patterns.iter().filter(|p| p.whitelist).count();
- Ok(Gitignore {
- set: try!(self.builder.build()),
- root: self.root,
- patterns: self.patterns,
- num_ignores: nignores as u64,
- num_whitelist: nwhitelist as u64,
- matches: Arc::new(ThreadLocal::default()),
- })
- }
-
- /// Add each pattern line from the file path given.
- pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
- let rdr = io::BufReader::new(try!(File::open(&path)));
- debug!("gitignore: {}", path.as_ref().display());
- for (i, line) in rdr.lines().enumerate() {
- let line = match line {
- Ok(line) => line,
- Err(err) => {
- debug!("error reading line {} in {}: {}",
- i, path.as_ref().display(), err);
- continue;
- }
- };
- if let Err(err) = self.add(&path, &line) {
- debug!("error adding gitignore pattern: '{}': {}", line, err);
- }
- }
- Ok(())
- }
-
- /// Add each pattern line from the string given.
- pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
- for line in gitignore.lines() {
- try!(self.add("", line));
- }
- Ok(())
- }
-
- /// Add a line from a gitignore file to this builder.
- ///
- /// If the line could not be parsed as a glob, then an error is returned.
- pub fn add<P: AsRef<Path>>(
- &mut self,
- from: P,
- mut line: &str,
- ) -> Result<(), Error> {
- if line.starts_with("#") {
- return Ok(());
- }
- if !line.ends_with("\\ ") {
- line = line.trim_right();
- }
- if line.is_empty() {
- return Ok(());
- }
- let mut pat = Pattern {
- from: from.as_ref().to_path_buf(),
- original: line.to_string(),
- pat: String::new(),
- whitelist: false,
- only_dir: false,
- };
- let mut literal_separator = false;
- let has_slash = line.chars().any(|c| c == '/');
- let is_absolute = line.chars().nth(0).unwrap() == '/';
- if line.starts_with("\\!") || line.starts_with("\\#") {
- line = &line[1..];
- } else {
- if line.starts_with("!") {
- pat.whitelist = true;
- line = &line[1..];
- }
- if line.starts_with("/") {
- // `man gitignore` says that if a glob starts with a slash,
- // then the glob can only match the beginning of a path
- // (relative to the location of gitignore). We achieve this by
- // simply banning wildcards from matching /.
- literal_separator = true;
- line = &line[1..];
- }
- }
- // If it ends with a slash, then this should only match directories,
- // but the slash should otherwise not be used while globbing.
- if let Some((i, c)) = line.char_indices().rev().nth(0) {
- if c == '/' {
- pat.only_dir = true;
- line = &line[..i];
- }
- }
- // If there is a literal slash, then we note that so that globbing
- // doesn't let wildcards match slashes.
- pat.pat = line.to_string();
- if has_slash {
- literal_separator = true;
- }
- // If there was a leading slash, then this is a pattern that must
- // match the entire path name. Otherwise, we should let it match
- // anywhere, so use a **/ prefix.
- if !is_absolute {
- // ... but only if we don't already have a **/ prefix.
- if !pat.pat.starts_with("**/") {
- pat.pat = format!("**/{}", pat.pat);
- }
- }
- // If the pattern ends with `/**`, then we should only match everything
- // inside a directory, but not the directory itself. Standard globs
- // will match the directory. So we add `/*` to force the issue.
- if pat.pat.ends_with("/**") {
- pat.pat = format!("{}/*", pat.pat);
- }
- let parsed = try!(
- GlobBuilder::new(&pat.pat)
- .literal_separator(literal_separator)
- .build());
- self.builder.add(parsed);
- self.patterns.push(pat);
- Ok(())
- }
-}
-
-#[cfg(test)]
-mod tests {
- use super::Gitignore;
-
- macro_rules! ignored {
- ($name:ident, $root:expr, $gi:expr, $path:expr) => {
- ignored!($name, $root, $gi, $path, false);
- };
- ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
- #[test]
- fn $name() {
- let gi = Gitignore::from_str($root, $gi).unwrap();
- assert!(gi.matched($path, $is_dir).is_ignored());
- }
- };
- }
-
- macro_rules! not_ignored {
- ($name:ident, $root:expr, $gi:expr, $path:expr) => {
- not_ignored!($name, $root, $gi, $path, false);
- };
- ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
- #[test]
- fn $name() {
- let gi = Gitignore::from_str($root, $gi).unwrap();
- assert!(!gi.matched($path, $is_dir).is_ignored());
- }
- };
- }
-
- const ROOT: &'static str = "/home/foobar/rust/rg";
-
- ignored!(ig1, ROOT, "months", "months");
- ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
- ignored!(ig3, ROOT, "*.rs", "src/main.rs");
- ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
- ignored!(ig5, ROOT, "/*.c", "cat-file.c");
- ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
- ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
- ignored!(ig8, ROOT, "foo/", "foo", true);
- ignored!(ig9, ROOT, "**/foo", "foo");
- ignored!(ig10, ROOT, "**/foo", "src/foo");
- ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
- ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
- ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
- ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
- ignored!(ig15, ROOT, "abc/**", "abc/x");
- ignored!(ig16, ROOT, "abc/**", "abc/x/y");
- ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
- ignored!(ig18, ROOT, "a/**/b", "a/b");
- ignored!(ig19, ROOT, "a/**/b", "a/x/b");
- ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
- ignored!(ig21, ROOT, r"\!xy", "!xy");
- ignored!(ig22, ROOT, r"\#foo", "#foo");
- ignored!(ig23, ROOT, "foo", "./foo");
- ignored!(ig24, ROOT, "target", "grep/target");
- ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
- ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
- ignored!(ig27, ROOT, "foo/", "xyz/foo", true);
- ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs");
- ignored!(ig29, "./src", "/llvm/", "./src/llvm", true);
- ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true);
-
- not_ignored!(ignot1, ROOT, "amonths", "months");
- not_ignored!(ignot2, ROOT, "monthsa", "months");
- not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs");
- not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
- not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
- not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
- not_ignored!(ignot7, ROOT, "foo/", "foo", false);
- not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
- not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
- not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
- not_ignored!(ignot11, ROOT, "#foo", "#foo");
- not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
- not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
- not_ignored!(
- ignot14, "./third_party/protobuf", "m4/ltoptions.m4",
- "./third_party/protobuf/csharp/src/packages/repositories.config");
-
- // See: https://github.com/BurntSushi/ripgrep/issues/106
- #[test]
- fn regression_106() {
- Gitignore::from_str("/", " ").unwrap();
- }
-}