use std::fs::File; use std::io; use std::path::{Path, PathBuf}; use encoding_rs::Encoding; use grep::Grep; use ignore::DirEntry; use memmap::Mmap; use termcolor::WriteColor; // use decoder::DecodeReader; use encoding_rs_io::DecodeReaderBytesBuilder; use decompressor::{self, DecompressionReader}; use preprocessor::PreprocessorReader; use pathutil::strip_prefix; use printer::Printer; use search_buffer::BufferSearcher; use search_stream::{InputBuffer, Searcher}; use Result; pub enum Work { Stdin, DirEntry(DirEntry), } pub struct WorkerBuilder { grep: Grep, opts: Options, } #[derive(Clone, Debug)] struct Options { mmap: bool, encoding: Option<&'static Encoding>, after_context: usize, before_context: usize, byte_offset: bool, count: bool, count_matches: bool, files_with_matches: bool, files_without_matches: bool, eol: u8, invert_match: bool, line_number: bool, max_count: Option, no_messages: bool, quiet: bool, text: bool, preprocessor: Option, search_zip_files: bool } impl Default for Options { fn default() -> Options { Options { mmap: false, encoding: None, after_context: 0, before_context: 0, byte_offset: false, count: false, count_matches: false, files_with_matches: false, files_without_matches: false, eol: b'\n', invert_match: false, line_number: false, max_count: None, no_messages: false, quiet: false, text: false, search_zip_files: false, preprocessor: None, } } } impl WorkerBuilder { /// Create a new builder for a worker. /// /// A reusable input buffer and a grep matcher are required, but there /// are numerous additional options that can be configured on this builder. pub fn new(grep: Grep) -> WorkerBuilder { WorkerBuilder { grep: grep, opts: Options::default(), } } /// Create the worker from this builder. pub fn build(self) -> Worker { let mut inpbuf = InputBuffer::new(); inpbuf.eol(self.opts.eol); Worker { grep: self.grep, inpbuf: inpbuf, decodebuf: vec![0; 8 * (1<<10)], opts: self.opts, } } /// The number of contextual lines to show after each match. The default /// is zero. pub fn after_context(mut self, count: usize) -> Self { self.opts.after_context = count; self } /// The number of contextual lines to show before each match. The default /// is zero. pub fn before_context(mut self, count: usize) -> Self { self.opts.before_context = count; self } /// If enabled, searching will print a 0-based offset of the /// matching line (or the actual match if -o is specified) before /// printing the line itself. /// /// Disabled by default. pub fn byte_offset(mut self, yes: bool) -> Self { self.opts.byte_offset = yes; self } /// If enabled, searching will print a count instead of each match. /// /// Disabled by default. pub fn count(mut self, yes: bool) -> Self { self.opts.count = yes; self } /// If enabled, searching will print the count of individual matches /// instead of each match. /// /// Disabled by default. pub fn count_matches(mut self, yes: bool) -> Self { self.opts.count_matches = yes; self } /// Set the encoding to use to read each file. /// /// If the encoding is `None` (the default), then the encoding is /// automatically detected on a best-effort per-file basis. pub fn encoding(mut self, enc: Option<&'static Encoding>) -> Self { self.opts.encoding = enc; self } /// If enabled, searching will print the path instead of each match. /// /// Disabled by default. pub fn files_with_matches(mut self, yes: bool) -> Self { self.opts.files_with_matches = yes; self } /// If enabled, searching will print the path of files without any matches. /// /// Disabled by default. pub fn files_without_matches(mut self, yes: bool) -> Self { self.opts.files_without_matches = yes; self } /// Set the end-of-line byte used by this searcher. pub fn eol(mut self, eol: u8) -> Self { self.opts.eol = eol; self } /// If enabled, matching is inverted so that lines that *don't* match the /// given pattern are treated as matches. pub fn invert_match(mut self, yes: bool) -> Self { self.opts.invert_match = yes; self } /// If enabled, compute line numbers and prefix each line of output with /// them. pub fn line_number(mut self, yes: bool) -> Self { self.opts.line_number = yes; self } /// Limit the number of matches to the given count. /// /// The default is None, which corresponds to no limit. pub fn max_count(mut self, count: Option) -> Self { self.opts.max_count = count; self } /// If enabled, try to use memory maps for searching if possible. pub fn mmap(mut self, yes: bool) -> Self { self.opts.mmap = yes; self } /// If enabled, error messages are suppressed. /// /// This is disabled by default. pub fn no_messages(mut self, yes: bool) -> Self { self.opts.no_messages = yes; self } /// If enabled, don't show any output and quit searching after the first /// match is found. pub fn quiet(mut self, yes: bool) -> Self { self.opts.quiet = yes; self } /// If enabled, search binary files as if they were text. pub fn text(mut self, yes: bool) -> Self { self.opts.text = yes; self } /// If enabled, search through compressed files as well pub fn search_zip_files(mut self, yes: bool) -> Self { self.opts.search_zip_files = yes; self } /// If non-empty, search output of preprocessor run on each file pub fn preprocessor(mut self, command: Option) -> Self { self.opts.preprocessor = command; self } } /// Worker is responsible for executing searches on file paths, while choosing /// streaming search or memory map search as appropriate. pub struct Worker { grep: Grep, inpbuf: InputBuffer, decodebuf: Vec, opts: Options, } impl Worker { /// Execute the worker with the given printer and work item. /// /// A work item can either be stdin or a file path. pub fn run( &mut self, printer: &mut Printer, work: Work, ) -> u64 { let result = match work { Work::Stdin => { let stdin = io::stdin(); let stdin = stdin.lock(); self.search(printer, Path::new(""), stdin) } Work::DirEntry(dent) => { let mut path = dent.path(); if self.opts.preprocessor.is_some() { let cmd = self.opts.preprocessor.clone().unwrap(); match PreprocessorReader::from_cmd_path(cmd, path) { Ok(reader) => self.search(printer, path, reader), Err(err) => { if !self.opts.no_messages { eprintln!("{}", err); } return 0; } } } else if self.opts.search_zip_files && decompressor::is_compressed(path) { match DecompressionReader::from_path(path) { Some(reader) => self.search(printer, path, reader), None => { return 0; } } } else { let file = match File::open(path) { Ok(file) => file, Err(err) => { if !self.opts.no_messages { eprintln!("{}: {}", path.display(), err); } return 0; } }; if let Some(p) = strip_prefix("./", path) { path = p; } if self.opts.mmap { self.search_mmap(printer, path, &file) } else { self.search(printer, path, file) } } } }; match result { Ok(count) => { count } Err(err) => { if !self.opts.no_messages { eprintln!("{}", err); } 0 } } } fn search( &mut self, printer: &mut Printer, path: &Path, rdr: R, ) -> Result { let rdr = DecodeReaderBytesBuilder::new() .encoding(self.opts.encoding) .utf8_passthru(true) .build_with_buffer(rdr, &mut self.decodebuf)?; let searcher = Searcher::new( &mut self.inpbuf, printer, &self.grep, path, rdr); searcher .after_context(self.opts.after_context) .before_context(self.opts.before_context) .byte_offset(self.opts.byte_offset) .count(self.opts.count) .count_matches(self.opts.count_matches) .files_with_matches(self.opts.files_with_matches) .files_without_matches(self.opts.files_without_matches) .eol(self.opts.eol) .line_number(self.opts.line_number) .invert_match(self.opts.invert_match) .max_count(self.opts.max_count) .quiet(self.opts.quiet) .text(self.opts.text) .run() .map_err(From::from) } fn search_mmap( &mut self, printer: &mut Printer, path: &Path, file: &File, ) -> Result { if file.metadata()?.len() == 0 { // Opening a memory map with an empty file results in an error. // However, this may not actually be an empty file! For example, // /proc/cpuinfo reports itself as an empty file, but it can // produce data when it's read from. Therefore, we fall back to // regular read calls. return self.search(printer, path, file); } let mmap = match self.mmap(file)? { None => return self.search(printer, path, file), Some(mmap) => mmap, }; let buf = &*mmap; if buf.len() >= 3 && Encoding::for_bom(buf).is_some() { // If we have a UTF-16 bom in our memory map, then we need to fall // back to the stream reader, which will do transcoding. return self.search(printer, path, file); } let searcher = BufferSearcher::new(printer, &self.grep, path, buf); Ok(searcher .byte_offset(self.opts.byte_offset) .count(self.opts.count) .count_matches(self.opts.count_matches) .files_with_matches(self.opts.files_with_matches) .files_without_matches(self.opts.files_without_matches) .eol(self.opts.eol) .line_number(self.opts.line_number) .invert_match(self.opts.invert_match) .max_count(self.opts.max_count) .quiet(self.opts.quiet) .text(self.opts.text) .run()) } #[cfg(not(unix))] fn mmap(&self, file: &File) -> Result> { Ok(Some(mmap_readonly(file)?)) } #[cfg(unix)] fn mmap(&self, file: &File) -> Result> { use libc::{EOVERFLOW, ENODEV, ENOMEM}; let err = match mmap_readonly(file) { Ok(mmap) => return Ok(Some(mmap)), Err(err) => err, }; let code = err.raw_os_error(); if code == Some(EOVERFLOW) || code == Some(ENODEV) || code == Some(ENOMEM) { return Ok(None); } Err(From::from(err)) } } fn mmap_readonly(file: &File) -> io::Result { unsafe { Mmap::map(file) } }