// This module provides a data structure, `Ignore`, that connects "directory
// traversal" with "ignore matchers." Specifically, it knows about gitignore
// semantics and precedence, and is organized based on directory hierarchy.
// Namely, every matcher logically corresponds to ignore rules from a single
// directory, and points to the matcher for its corresponding parent directory.
// In this sense, `Ignore` is a *persistent* data structure.
//
// This design was specifically chosen to make it possible to use this data
// structure in a parallel directory iterator.
//
// My initial intention was to expose this module as part of this crate's
// public API, but I think the data structure's public API is too complicated
// with non-obvious failure modes. Alas, such things haven't been documented
// well.
use std::{
collections::HashMap,
ffi::{OsStr, OsString},
fs::{File, FileType},
io::{self, BufRead},
path::{Path, PathBuf},
sync::{Arc, RwLock},
};
use crate::{
gitignore::{self, Gitignore, GitignoreBuilder},
overrides::{self, Override},
pathutil::{is_hidden, strip_prefix},
types::{self, Types},
walk::DirEntry,
{Error, Match, PartialErrorBuilder},
};
/// IgnoreMatch represents information about where a match came from when using
/// the `Ignore` matcher.
#[derive(Clone, Debug)]
pub(crate) struct IgnoreMatch<'a>(IgnoreMatchInner<'a>);
/// IgnoreMatchInner describes precisely where the match information came from.
/// This is private to allow expansion to more matchers in the future.
#[derive(Clone, Debug)]
enum IgnoreMatchInner<'a> {
Override(overrides::Glob<'a>),
Gitignore(&'a gitignore::Glob),
Types(types::Glob<'a>),
Hidden,
}
impl<'a> IgnoreMatch<'a> {
fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> {
IgnoreMatch(IgnoreMatchInner::Override(x))
}
fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> {
IgnoreMatch(IgnoreMatchInner::Gitignore(x))
}
fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> {
IgnoreMatch(IgnoreMatchInner::Types(x))
}
fn hidden() -> IgnoreMatch<'static> {
IgnoreMatch(IgnoreMatchInner::Hidden)
}
}
/// Options for the ignore matcher, shared between the matcher itself and the
/// builder.
#[derive(Clone, Copy, Debug)]
struct IgnoreOptions {
/// Whether to ignore hidden file paths or not.
hidden: bool,
/// Whether to read .ignore files.
ignore: bool,
/// Whether to respect any ignore files in parent directories.
parents: bool,
/// Whether to read git's global gitignore file.
git_global: bool,
/// Whether to read .gitignore files.
git_ignore: bool,
/// Whether to read .git/info/exclude files.
git_exclude: bool,
/// Whether to ignore files case insensitively
ignore_case_insensitive: bool,
/// Whether a git repository must be present in order to apply any
/// git-related ignore rules.
require_git: bool,
}
/// Ignore is a matcher useful for recursively walking one or more directories.
#[derive(Clone, Debug)]
pub(crate) struct Ignore(Arc<IgnoreInner>);
#[derive(Clone, Debug)]
struct IgnoreInner {
/// A map of all existing directories that have already been
/// compiled into matchers.
///
/// Note that this is never used during matching, only when adding new
/// parent directory matchers. This avoids needing to rebuild glob sets for
/// parent directories if many paths are being searched.
compiled: Arc<RwLock<HashMap<OsString, Ignore>>>,
/// The path to the directory that this matcher was built from.
dir: PathBuf,
/// An override matcher (default is empty).
overrides: Arc<Override>,
/// A file type matcher.
types: Arc<Types>,
/// The parent directory to match next.
///
/// If this is the root directory or there are otherwise no more
/// directories to match, then `parent` is `None`.