diff options
author | Andrew Gallant <jamslam@gmail.com> | 2016-09-05 10:15:13 -0400 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2016-09-05 10:15:13 -0400 |
commit | d8d7560fd082ce2f2e004b70c7993b4eaac25033 (patch) | |
tree | 5dc227a366fa17aa699ba51ee3d490654ff76056 /src | |
parent | 812cdb13c63441e2eddcaccdf632fdfb7c806a95 (diff) |
TODOs and some cleanup/refactoring.
Diffstat (limited to 'src')
-rw-r--r-- | src/gitignore.rs | 6 | ||||
-rw-r--r-- | src/glob.rs | 13 | ||||
-rw-r--r-- | src/main.rs | 88 | ||||
-rw-r--r-- | src/search.rs | 4 | ||||
-rw-r--r-- | src/walk.rs | 12 |
5 files changed, 70 insertions, 53 deletions
diff --git a/src/gitignore.rs b/src/gitignore.rs index 14229ca1..39508005 100644 --- a/src/gitignore.rs +++ b/src/gitignore.rs @@ -118,14 +118,8 @@ impl Gitignore { /// of the directory containing this gitignore) is stripped. If there is /// no common suffix/prefix overlap, then path is assumed to reside in the /// same directory as this gitignore file. - /// - /// If the given path has a `./` prefix then it is stripped before - /// matching. pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match { let mut path = path.as_ref(); - if let Ok(p) = path.strip_prefix("./") { - path = p; - } if let Ok(p) = path.strip_prefix(&self.root) { path = p; } diff --git a/src/glob.rs b/src/glob.rs index c560facb..01fdd3e0 100644 --- a/src/glob.rs +++ b/src/glob.rs @@ -13,6 +13,19 @@ that rigamorole when I wrote this. In particular, it could be fast/good enough to make its way into `glob` proper. */ +// TODO(burntsushi): I'm pretty dismayed by the performance of regex sets +// here. For example, we do a first pass single-regex-of-all-globs filter +// before actually running the regex set. This turns out to be faster, +// especially in fresh checkouts of repos that don't have a lot of ignored +// files. It's not clear how hard it is to make the regex set faster. +// +// An alternative avenue is to stop doing "regex all the things." (Which, to +// be fair, is pretty fast---I just expected it to be faster.) We could do +// something clever using assumptions along the lines of "oh, most ignore +// patterns are either literals or are for ignoring file extensions." (Look +// at the .gitignore for the chromium repo---just about every pattern satisfies +// that assumption.) + use std::error::Error as StdError; use std::fmt; use std::iter; diff --git a/src/main.rs b/src/main.rs index 89b003db..0b843eba 100644 --- a/src/main.rs +++ b/src/main.rs @@ -22,7 +22,7 @@ extern crate walkdir; use std::error::Error; use std::fs::File; use std::io::{self, Write}; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::process; use std::result; use std::sync::Arc; @@ -31,6 +31,7 @@ use std::thread; use crossbeam::sync::chase_lev::{self, Steal, Stealer}; use grep::Grep; use parking_lot::Mutex; +use walkdir::DirEntry; use args::Args; use out::Out; @@ -94,6 +95,7 @@ fn run(args: Args) -> Result<u64> { inpbuf: args.input_buffer(), outbuf: Some(vec![]), grep: try!(args.grep()), + match_count: 0, }; workers.push(thread::spawn(move || worker.run())); } @@ -103,8 +105,8 @@ fn run(args: Args) -> Result<u64> { if p == Path::new("-") { workq.push(Work::Stdin) } else { - for path in args.walker(p) { - workq.push(Work::File(path)); + for ent in args.walker(p) { + workq.push(Work::File(ent)); } } } @@ -126,8 +128,8 @@ fn run_files(args: Args) -> Result<u64> { printer.path(&Path::new("<stdin>")); file_count += 1; } else { - for path in args.walker(p) { - printer.path(path); + for ent in args.walker(p) { + printer.path(ent.path()); file_count += 1; } } @@ -146,11 +148,16 @@ fn run_types(args: Args) -> Result<u64> { } enum Work { - File(PathBuf), Stdin, + File(DirEntry), Quit, } +enum WorkReady { + Stdin, + File(DirEntry, File), +} + struct Worker { args: Arc<Args>, out: Arc<Mutex<Out<io::Stdout>>>, @@ -158,51 +165,31 @@ struct Worker { inpbuf: InputBuffer, outbuf: Option<Vec<u8>>, grep: Grep, + match_count: u64, } impl Worker { fn run(mut self) -> u64 { - let mut match_count = 0; + self.match_count = 0; loop { - let (path, file) = match self.chan_work.steal() { + let work = match self.chan_work.steal() { Steal::Empty | Steal::Abort => continue, Steal::Data(Work::Quit) => break, - Steal::Data(Work::File(path)) => { - match File::open(&path) { - Ok(file) => (path, Some(file)), + Steal::Data(Work::Stdin) => WorkReady::Stdin, + Steal::Data(Work::File(ent)) => { + match File::open(ent.path()) { + Ok(file) => WorkReady::File(ent, file), Err(err) => { - eprintln!("{}: {}", path.display(), err); + eprintln!("{}: {}", ent.path().display(), err); continue; } } } - Steal::Data(Work::Stdin) => { - (Path::new("<stdin>").to_path_buf(), None) - } }; let mut outbuf = self.outbuf.take().unwrap(); outbuf.clear(); let mut printer = self.args.printer(outbuf); - { - let result = match file { - None => { - let stdin = io::stdin(); - let stdin = stdin.lock(); - self.search(&mut printer, &path, stdin) - } - Some(file) => { - self.search(&mut printer, &path, file) - } - }; - match result { - Ok(count) => { - match_count += count; - } - Err(err) => { - eprintln!("{}", err); - } - } - } + self.do_work(&mut printer, work); let outbuf = printer.into_inner(); if !outbuf.is_empty() { let mut out = self.out.lock(); @@ -210,7 +197,36 @@ impl Worker { } self.outbuf = Some(outbuf); } - match_count + self.match_count + } + + fn do_work<W: io::Write>( + &mut self, + printer: &mut Printer<W>, + work: WorkReady, + ) { + let result = match work { + WorkReady::Stdin => { + let stdin = io::stdin(); + let stdin = stdin.lock(); + self.search(printer, &Path::new("<stdin>"), stdin) + } + WorkReady::File(ent, file) => { + let mut path = ent.path(); + if let Ok(p) = path.strip_prefix("./") { + path = p; + } + self.search(printer, path, file) + } + }; + match result { + Ok(count) => { + self.match_count += count; + } + Err(err) => { + eprintln!("{}", err); + } + } } fn search<R: io::Read, W: io::Write>( diff --git a/src/search.rs b/src/search.rs index ea5602d1..d0efb7ae 100644 --- a/src/search.rs +++ b/src/search.rs @@ -739,7 +739,7 @@ fn main() { mut map: F, ) -> (u64, String) { let mut inp = InputBuffer::with_capacity(1); - let mut pp = Printer::new(vec![]); + let mut pp = Printer::new(vec![]).with_filename(true); let grep = GrepBuilder::new(pat).build().unwrap(); let count = { let searcher = Searcher::new( @@ -755,7 +755,7 @@ fn main() { mut map: F, ) -> (u64, String) { let mut inp = InputBuffer::with_capacity(4096); - let mut pp = Printer::new(vec![]); + let mut pp = Printer::new(vec![]).with_filename(true); let grep = GrepBuilder::new(pat).build().unwrap(); let count = { let searcher = Searcher::new( diff --git a/src/walk.rs b/src/walk.rs index 524e6f0b..e60e2605 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -4,8 +4,6 @@ crate that can efficiently skip and ignore files and directories specified in a user's ignore patterns. */ -use std::path::PathBuf; - use walkdir::{self, DirEntry, WalkDir, WalkDirIterator}; use ignore::Ignore; @@ -41,9 +39,9 @@ impl Iter { } impl Iterator for Iter { - type Item = PathBuf; + type Item = DirEntry; - fn next(&mut self) -> Option<PathBuf> { + fn next(&mut self) -> Option<DirEntry> { while let Some(ev) = self.it.next() { match ev { Err(err) => { @@ -76,11 +74,7 @@ impl Iterator for Iter { if !ent.file_type().is_file() { continue; } - let mut path = ent.path(); - if let Ok(p) = path.strip_prefix("./") { - path = p; - } - return Some(path.to_path_buf()); + return Some(ent); } } } |