From 0d14c74e634563c29877a33db3dc6f56ea16a95d Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 16 Sep 2016 16:13:28 -0400 Subject: Some minor performance tweaks. This includes moving basename-only globs into separate regexes. The hope is that if the regex processes less input, it will be faster. --- Cargo.lock | 12 ++++++------ src/glob.rs | 37 ++++++++++++++++++++++++++++++++++++- src/ignore.rs | 9 +-------- src/pathutil.rs | 32 +++++++++++++++++++++++++------- src/walk.rs | 3 +++ 5 files changed, 71 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 96da64f0..12e6d8e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,11 +14,11 @@ dependencies = [ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", "memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", "term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -134,7 +134,7 @@ dependencies = [ [[package]] name = "num_cpus" -version = "1.0.0" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", @@ -214,7 +214,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "walkdir" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -245,7 +245,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054" "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" "checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752" -"checksum num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a859041cbf7a70ea1ece4b87d1a2c6ef364dcb68749c88db1f97304b9ec09d5f" +"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad" "checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5" "checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd" @@ -256,6 +256,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" "checksum thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "55dd963dbaeadc08aa7266bf7f91c3154a7805e32bb94b820b769d2ef3b4744d" "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" -"checksum walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "d42144c31c9909882ce76e696b306b88a5b091721251137d5d522d1ef3da7cf9" +"checksum walkdir 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "5e415f89803a053390d21ecb49244deb5b30fb34aeec4a38badb747c83a4c668" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/src/glob.rs b/src/glob.rs index e7da9322..bb033d69 100644 --- a/src/glob.rs +++ b/src/glob.rs @@ -124,6 +124,8 @@ pub struct Set { base_prefixes_map: Vec, base_suffixes: Vec>, base_suffixes_map: Vec, + base_regexes: RegexSet, + base_regexes_map: Vec, regexes: RegexSet, regexes_map: Vec, } @@ -195,7 +197,14 @@ impl Set { } } } - into.extend(self.regexes.matches(path_bytes)); + if let Some(ref basename) = basename { + for i in self.base_regexes.matches(&**basename) { + into.push(self.base_regexes_map[i]); + } + } + for i in self.regexes.matches(path_bytes) { + into.push(self.regexes_map[i]); + } into.sort(); } @@ -207,6 +216,7 @@ impl Set { let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]); let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]); let (mut regexes, mut regexes_map) = (vec![], vec![]); + let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]); for (i, &(ref p, ref o)) in pats.iter().enumerate() { if let Some(ext) = p.ext() { exts.entry(ext).or_insert(vec![]).push(i); @@ -221,6 +231,10 @@ impl Set { } else if let Some(literal) = p.base_literal_suffix() { base_suffixes.push(literal.into_bytes()); base_suffixes_map.push(i); + } else if p.is_only_basename() { + let part = format!("(?:{})", p.to_regex_with(o)); + base_regexes.push(part); + base_regexes_map.push(i); } else { let part = format!("(?:{})", p.to_regex_with(o)); regexes.push(part); @@ -236,6 +250,8 @@ impl Set { base_prefixes_map: base_prefixes_map, base_suffixes: base_suffixes, base_suffixes_map: base_suffixes_map, + base_regexes: try!(RegexSet::new(base_regexes)), + base_regexes_map: base_regexes_map, regexes: try!(RegexSet::new(regexes)), regexes_map: regexes_map, }) @@ -402,6 +418,25 @@ impl Pattern { Some(lit) } + /// Returns true if and only if this pattern only inspects the basename + /// of a path. + pub fn is_only_basename(&self) -> bool { + match self.tokens.get(0) { + Some(&Token::RecursivePrefix) => {} + _ => return false, + } + for t in &self.tokens[1..] { + match *t { + Token::Literal(c) if c == '/' || c == '\\' => return false, + Token::RecursivePrefix + | Token::RecursiveSuffix + | Token::RecursiveZeroOrMore => return false, + _ => {} + } + } + true + } + /// Returns the pattern as a literal if and only if the pattern must match /// an entire path exactly. /// diff --git a/src/ignore.rs b/src/ignore.rs index 10a81886..d784f994 100644 --- a/src/ignore.rs +++ b/src/ignore.rs @@ -19,6 +19,7 @@ use std::io; use std::path::{Path, PathBuf}; use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern}; +use pathutil::is_hidden; use types::Types; const IGNORE_NAMES: &'static [&'static str] = &[ @@ -377,14 +378,6 @@ impl Overrides { } } -fn is_hidden>(path: P) -> bool { - if let Some(name) = path.as_ref().file_name() { - name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) - } else { - false - } -} - #[cfg(test)] mod tests { use std::path::Path; diff --git a/src/pathutil.rs b/src/pathutil.rs index 3cc92f7b..01342ac0 100644 --- a/src/pathutil.rs +++ b/src/pathutil.rs @@ -11,6 +11,8 @@ improvement on just listing the files to search (!). use std::ffi::OsStr; use std::path::Path; +use memchr::memrchr; + /// Strip `prefix` from the `path` and return the remainder. /// /// If `path` doesn't have a prefix `prefix`, then return `None`. @@ -58,13 +60,7 @@ pub fn file_name<'a, P: AsRef + ?Sized>( } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] { return None; } - let mut last_slash = 0; - for (i, &b) in path.iter().enumerate().rev() { - if b == b'/' { - last_slash = i + 1; - break; - } - } + let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0); Some(OsStr::from_bytes(&path[last_slash..])) } @@ -78,3 +74,25 @@ pub fn file_name<'a, P: AsRef + ?Sized>( ) -> Option<&'a OsStr> { path.as_ref().file_name() } + +/// Returns true if and only if this file path is considered to be hidden. +#[cfg(unix)] +pub fn is_hidden>(path: P) -> bool { + use std::os::unix::ffi::OsStrExt; + + if let Some(name) = file_name(path.as_ref()) { + name.as_bytes().get(0) == Some(&b'.') + } else { + false + } +} + +/// Returns true if and only if this file path is considered to be hidden. +#[cfg(not(unix))] +pub fn is_hidden>(path: P) -> bool { + if let Some(name) = file_name(path) { + name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) + } else { + false + } +} diff --git a/src/walk.rs b/src/walk.rs index b100802a..f661c4cf 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -26,6 +26,7 @@ impl Iter { } /// Returns true if this entry should be skipped. + #[inline(always)] fn skip_entry(&self, ent: &DirEntry) -> bool { if ent.depth() == 0 { // Never skip the root directory. @@ -41,6 +42,7 @@ impl Iter { impl Iterator for Iter { type Item = DirEntry; + #[inline(always)] fn next(&mut self) -> Option { while let Some(ev) = self.it.next() { match ev { @@ -108,6 +110,7 @@ impl From for WalkEventIter { impl Iterator for WalkEventIter { type Item = walkdir::Result; + #[inline(always)] fn next(&mut self) -> Option> { let dent = self.next.take().or_else(|| self.it.next()); let depth = match dent { -- cgit v1.2.3