use std::io; use std::path::{Path, PathBuf}; use std::time::Duration; use grep::matcher::Matcher; #[cfg(feature = "pcre2")] use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher}; use grep::printer::{JSON, Standard, Summary, Stats}; use grep::regex::{RegexMatcher as RustRegexMatcher}; use grep::searcher::Searcher; use serde_json as json; use termcolor::WriteColor; use decompressor::{DecompressionReader, is_compressed}; use preprocessor::PreprocessorReader; use subject::Subject; /// The configuration for the search worker. Among a few other things, the /// configuration primarily controls the way we show search results to users /// at a very high level. #[derive(Clone, Debug)] struct Config { json_stats: bool, preprocessor: Option, search_zip: bool, } impl Default for Config { fn default() -> Config { Config { json_stats: false, preprocessor: None, search_zip: false, } } } /// A builder for configuring and constructing a search worker. #[derive(Clone, Debug)] pub struct SearchWorkerBuilder { config: Config, } impl Default for SearchWorkerBuilder { fn default() -> SearchWorkerBuilder { SearchWorkerBuilder::new() } } impl SearchWorkerBuilder { /// Create a new builder for configuring and constructing a search worker. pub fn new() -> SearchWorkerBuilder { SearchWorkerBuilder { config: Config::default() } } /// Create a new search worker using the given searcher, matcher and /// printer. pub fn build( &self, matcher: PatternMatcher, searcher: Searcher, printer: Printer, ) -> SearchWorker { let config = self.config.clone(); SearchWorker { config, matcher, searcher, printer } } /// Forcefully use JSON to emit statistics, even if the underlying printer /// is not the JSON printer. /// /// This is useful for implementing flag combinations like /// `--json --quiet`, which uses the summary printer for implementing /// `--quiet` but still wants to emit summary statistics, which should /// be JSON formatted because of the `--json` flag. pub fn json_stats(&mut self, yes: bool) -> &mut SearchWorkerBuilder { self.config.json_stats = yes; self } /// Set the path to a preprocessor command. /// /// When this is set, instead of searching files directly, the given /// command will be run with the file path as the first argument, and the /// output of that command will be searched instead. pub fn preprocessor( &mut self, cmd: Option, ) -> &mut SearchWorkerBuilder { self.config.preprocessor = cmd; self } /// Enable the decompression and searching of common compressed files. /// /// When enabled, if a particular file path is recognized as a compressed /// file, then it is decompressed before searching. /// /// Note that if a preprocessor command is set, then it overrides this /// setting. pub fn search_zip(&mut self, yes: bool) -> &mut SearchWorkerBuilder { self.config.search_zip = yes; self } } /// The result of executing a search. /// /// Generally speaking, the "result" of a search is sent to a printer, which /// writes results to an underlying writer such as stdout or a file. However, /// every search also has some aggregate statistics or meta data that may be /// useful to higher level routines. #[derive(Clone, Debug, Default)] pub struct SearchResult { has_match: bool, stats: Option, } impl SearchResult { /// Whether the search found a match or not. pub fn has_match(&self) -> bool { self.has_match } /// Return aggregate search statistics for a single search, if available. /// /// It can be expensive to compute statistics, so these are only present /// if explicitly enabled in the printer provided by the caller. pub fn stats(&self) -> Option<&Stats> { self.stats.as_ref() } } /// The pattern matcher used by a search worker. #[derive(Clone, Debug)] pub enum PatternMatcher { RustRegex(RustRegexMatcher), #[cfg(feature = "pcre2")] PCRE2(PCRE2RegexMatcher), } /// The printer used by a search worker. /// /// The `W` type parameter refers to the type of the underlying writer. #[derive(Debug)] pub enum Printer { /// Use the standard printer, which supports the classic grep-like format. Standard(Standard), /// Use the summary printer, which supports aggregate displays of search /// results. Summary(Summary), /// A JSON printer, which emits results in the JSON Lines format. JSON(JSON), } impl Printer { fn print_stats( &mut self, total_duration: Duration, stats: &Stats, ) -> io::Result<()> { match *self { Printer::JSON(_) => { self.print_stats_json(total_duration, stats) } Printer::Standard(_) | Printer::Summary(_) => { self.print_stats_human(total_duration, stats) } } } fn print_stats_human( &mut self, total_duration: Duration, stats: &Stats, ) -> io::Result<()> { write!( self.get_mut(), " {matches} matches {lines} matched lines {searches_with_match} files contained matches {searches} files searched {bytes_printed} bytes printed {bytes_searched} bytes searched {search_time:0.6} seconds spent searching {process_time:0.6} seconds ", matches = stats.matches(), lines = stats.matched_lines(), searches_with_match = stats.searches_with_match(), searches = stats.searches(), bytes_printed = stats.bytes_printed(), bytes_searched = stats.bytes_searched(), search_time = fractional_seconds(stats.elapsed()), process_time = fractional_seconds(total_duration) ) } fn print_stats_json( &mut self, total_duration: Duration, stats: &Stats, ) -> io::Result<()> { // We specifically match the format laid out by the JSON printer in // the grep-printer crate. We simply "extend" it with the 'summary' // message type. let fractional = fractional_seconds(total_duration); json::to_writer(self.get_mut(), &json!({ "type": "summary", "data": { "stats": stats, "elapsed_total": { "secs": total_duration.as_secs(), "nanos": total_duration.subsec_nanos(), "human": format!("{:0.6}s", fractional), }, } }))?; write!(self.get_mut(), "\n") } /// Return a mutable reference to the underlying printer's writer. pub fn get_mut(&mut self) -> &mut W { match *self { Printer::Standard(ref mut p) => p.get_mut(), Printer::Summary(ref mut p) => p.get_mut(), Printer::JSON(ref mut p) => p.get_mut(), } } } /// A worker for executing searches. /// /// It is intended for a single worker to execute many searches, and is /// generally intended to be used from a single thread. When searching using /// multiple threads, it is better to create a new worker for each thread. #[derive(Debug)] pub struct SearchWorker { config: Config, matcher: PatternMatcher, searcher: Searcher, printer: Printer, } impl SearchWorker { /// Execute a search over the given subject. pub fn search(&mut self, subject: &Subject) -> io::Result { self.search_impl(subject) } /// Return a mutable reference to the underlying printer. pub fn printer(&mut self) -> &mut Printer { &mut self.printer } /// Print the given statistics to the underlying writer in a way that is /// consistent with this searcher's printer's format. /// /// While `Stats` contains a duration itself, this only corresponds to the /// time spent searching, where as `total_duration` should roughly /// approximate the lifespan of the ripgrep process itself. pub fn print_stats( &mut self, total_duration: Duration, stats: &Stats, ) -> io::Result<()> { if self.config.json_stats { self.printer().print_stats_json(total_duration, stats) } else { self.printer().print_stats(total_duration, stats) } } /// Search the given subject using the appropriate strategy. fn search_impl(&mut self, subject: &Subject) -> io::Result { let path = subject.path(); if subject.is_stdin() { let stdin = io::stdin(); // A `return` here appeases the borrow checker. NLL will fix this. return self.search_reader(path, stdin.lock()); } else if self.config.preprocessor.is_some() { let cmd = self.config.preprocessor.clone().unwrap(); let rdr = PreprocessorReader::from_cmd_path(cmd, path)?; self.search_reader(path, rdr) } else if self.config.search_zip && is_compressed(path) { match DecompressionReader::from_path(path) { None => Ok(SearchResult::default()), Some(rdr) => self.search_reader(path, rdr), } } else { self.search_path(path) } } /// Search the contents of the given file path. fn search_path(&mut self, path: &Path) -> io::Result { use self::PatternMatcher::*; let (searcher, printer) = (&mut self.searcher, &mut self.printer); match self.matcher { RustRegex(ref m) => search_path(m, searcher, printer, path), #[cfg(feature = "pcre2")] PCRE2(ref m) => search_path(m, searcher, printer, path), } } /// Executes a search on the given reader, which may or may not correspond /// directly to the contents of the given file path. Instead, the reader /// may actually cause something else to be searched (for example, when /// a preprocessor is set or when decompression is enabled). In those /// cases, the file path is used for visual purposes only. /// /// Generally speaking, this method should only be used when there is no /// other choice. Searching via `search_path` provides more opportunities /// for optimizations (such as memory maps). fn search_reader( &mut self, path: &Path, rdr: R, ) -> io::Result { use self::PatternMatcher::*; let (searcher, printer) = (&mut self.searcher, &mut self.printer); match self.matcher { RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr), #[cfg(feature = "pcre2")] PCRE2(ref m) => search_reader(m, searcher, printer, path, rdr), } } } /// Search the contents of the given file path using the given matcher, /// searcher and printer. fn search_path( matcher: M, searcher: &mut Searcher, printer: &mut Printer, path: &Path, ) -> io::Result { match *printer { Printer::Standard(ref mut p) => { let mut sink = p.sink_with_path(&matcher, path); searcher.search_path(&matcher, path, &mut sink)?; Ok(SearchResult { has_match: sink.has_match(), stats: sink.stats().map(|s| s.clone()), }) } Printer::Summary(ref mut p) => { let mut sink = p.sink_with_path(&matcher, path); searcher.search_path(&matcher, path, &mut sink)?; Ok(SearchResult { has_match: sink.has_match(), stats: sink.stats().map(|s| s.clone()), }) } Printer::JSON(ref mut p) => { let mut sink = p.sink_with_path(&matcher, path); searcher.search_path(&matcher, path, &mut sink)?; Ok(SearchResult { has_match: sink.has_match(), stats: Some(sink.stats().clone()), }) } } } /// Search the contents of the given reader using the given matcher, searcher /// and printer. fn search_reader( matcher: M, searcher: &mut Searcher, printer: &mut Printer, path: &Path, rdr: R, ) -> io::Result { match *printer { Printer::Standard(ref mut p) => { let mut sink = p.sink_with_path(&matcher, path); searcher.search_reader(&matcher, rdr, &mut sink)?; Ok(SearchResult { has_match: sink.has_match(), stats: sink.stats().map(|s| s.clone()), }) } Printer::Summary(ref mut p) => { let mut sink = p.sink_with_path(&matcher, path); searcher.search_reader(&matcher, rdr, &mut sink)?; Ok(SearchResult { has_match: sink.has_match(), stats: sink.stats().map(|s| s.clone()), }) } Printer::JSON(ref mut p) => { let mut sink = p.sink_with_path(&matcher, path); searcher.search_reader(&matcher, rdr, &mut sink)?; Ok(SearchResult { has_match: sink.has_match(), stats: Some(sink.stats().clone()), }) } } } /// Return the given duration as fractional seconds. fn fractional_seconds(duration: Duration) -> f64 { (duration.as_secs() as f64) + (duration.subsec_nanos() as f64 * 1e-9) }