summaryrefslogtreecommitdiffstats
path: root/src/search.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/search.rs')
-rw-r--r--src/search.rs408
1 files changed, 408 insertions, 0 deletions
diff --git a/src/search.rs b/src/search.rs
new file mode 100644
index 00000000..45f7cf87
--- /dev/null
+++ b/src/search.rs
@@ -0,0 +1,408 @@
+use std::io;
+use std::path::{Path, PathBuf};
+use std::time::Duration;
+
+use grep::matcher::Matcher;
+#[cfg(feature = "pcre2")]
+use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher};
+use grep::printer::{JSON, Standard, Summary, Stats};
+use grep::regex::{RegexMatcher as RustRegexMatcher};
+use grep::searcher::Searcher;
+use serde_json as json;
+use termcolor::WriteColor;
+
+use decompressor::{DecompressionReader, is_compressed};
+use preprocessor::PreprocessorReader;
+use subject::Subject;
+
+/// The configuration for the search worker. Among a few other things, the
+/// configuration primarily controls the way we show search results to users
+/// at a very high level.
+#[derive(Clone, Debug)]
+struct Config {
+ json_stats: bool,
+ preprocessor: Option<PathBuf>,
+ search_zip: bool,
+}
+
+impl Default for Config {
+ fn default() -> Config {
+ Config {
+ json_stats: false,
+ preprocessor: None,
+ search_zip: false,
+ }
+ }
+}
+
+/// A builder for configuring and constructing a search worker.
+#[derive(Clone, Debug)]
+pub struct SearchWorkerBuilder {
+ config: Config,
+}
+
+impl Default for SearchWorkerBuilder {
+ fn default() -> SearchWorkerBuilder {
+ SearchWorkerBuilder::new()
+ }
+}
+
+impl SearchWorkerBuilder {
+ /// Create a new builder for configuring and constructing a search worker.
+ pub fn new() -> SearchWorkerBuilder {
+ SearchWorkerBuilder { config: Config::default() }
+ }
+
+ /// Create a new search worker using the given searcher, matcher and
+ /// printer.
+ pub fn build<W: WriteColor>(
+ &self,
+ matcher: PatternMatcher,
+ searcher: Searcher,
+ printer: Printer<W>,
+ ) -> SearchWorker<W> {
+ let config = self.config.clone();
+ SearchWorker { config, matcher, searcher, printer }
+ }
+
+ /// Forcefully use JSON to emit statistics, even if the underlying printer
+ /// is not the JSON printer.
+ ///
+ /// This is useful for implementing flag combinations like
+ /// `--json --quiet`, which uses the summary printer for implementing
+ /// `--quiet` but still wants to emit summary statistics, which should
+ /// be JSON formatted because of the `--json` flag.
+ pub fn json_stats(&mut self, yes: bool) -> &mut SearchWorkerBuilder {
+ self.config.json_stats = yes;
+ self
+ }
+
+ /// Set the path to a preprocessor command.
+ ///
+ /// When this is set, instead of searching files directly, the given
+ /// command will be run with the file path as the first argument, and the
+ /// output of that command will be searched instead.
+ pub fn preprocessor(
+ &mut self,
+ cmd: Option<PathBuf>,
+ ) -> &mut SearchWorkerBuilder {
+ self.config.preprocessor = cmd;
+ self
+ }
+
+ /// Enable the decompression and searching of common compressed files.
+ ///
+ /// When enabled, if a particular file path is recognized as a compressed
+ /// file, then it is decompressed before searching.
+ ///
+ /// Note that if a preprocessor command is set, then it overrides this
+ /// setting.
+ pub fn search_zip(&mut self, yes: bool) -> &mut SearchWorkerBuilder {
+ self.config.search_zip = yes;
+ self
+ }
+}
+
+/// The result of executing a search.
+///
+/// Generally speaking, the "result" of a search is sent to a printer, which
+/// writes results to an underlying writer such as stdout or a file. However,
+/// every search also has some aggregate statistics or meta data that may be
+/// useful to higher level routines.
+#[derive(Clone, Debug, Default)]
+pub struct SearchResult {
+ has_match: bool,
+ stats: Option<Stats>,
+}
+
+impl SearchResult {
+ /// Whether the search found a match or not.
+ pub fn has_match(&self) -> bool {
+ self.has_match
+ }
+
+ /// Return aggregate search statistics for a single search, if available.
+ ///
+ /// It can be expensive to compute statistics, so these are only present
+ /// if explicitly enabled in the printer provided by the caller.
+ pub fn stats(&self) -> Option<&Stats> {
+ self.stats.as_ref()
+ }
+}
+
+/// The pattern matcher used by a search worker.
+#[derive(Clone, Debug)]
+pub enum PatternMatcher {
+ RustRegex(RustRegexMatcher),
+ #[cfg(feature = "pcre2")]
+ PCRE2(PCRE2RegexMatcher),
+}
+
+/// The printer used by a search worker.
+///
+/// The `W` type parameter refers to the type of the underlying writer.
+#[derive(Debug)]
+pub enum Printer<W> {
+ /// Use the standard printer, which supports the classic grep-like format.
+ Standard(Standard<W>),
+ /// Use the summary printer, which supports aggregate displays of search
+ /// results.
+ Summary(Summary<W>),
+ /// A JSON printer, which emits results in the JSON Lines format.
+ JSON(JSON<W>),
+}
+
+impl<W: WriteColor> Printer<W> {
+ fn print_stats(
+ &mut self,
+ total_duration: Duration,
+ stats: &Stats,
+ ) -> io::Result<()> {
+ match *self {
+ Printer::JSON(_) => {
+ self.print_stats_json(total_duration, stats)
+ }
+ Printer::Standard(_) | Printer::Summary(_) => {
+ self.print_stats_human(total_duration, stats)
+ }
+ }
+ }
+
+ fn print_stats_human(
+ &mut self,
+ total_duration: Duration,
+ stats: &Stats,
+ ) -> io::Result<()> {
+ write!(
+ self.get_mut(),
+ "
+{matches} matches
+{lines} matched lines
+{searches_with_match} files contained matches
+{searches} files searched
+{bytes_printed} bytes printed
+{bytes_searched} bytes searched
+{search_time:0.6} seconds spent searching
+{process_time:0.6} seconds
+",
+ matches = stats.matches(),
+ lines = stats.matched_lines(),
+ searches_with_match = stats.searches_with_match(),
+ searches = stats.searches(),
+ bytes_printed = stats.bytes_printed(),
+ bytes_searched = stats.bytes_searched(),
+ search_time = fractional_seconds(stats.elapsed()),
+ process_time = fractional_seconds(total_duration)
+ )
+ }
+
+ fn print_stats_json(
+ &mut self,
+ total_duration: Duration,
+ stats: &Stats,
+ ) -> io::Result<()> {
+ // We specifically match the format laid out by the JSON printer in
+ // the grep-printer crate. We simply "extend" it with the 'summary'
+ // message type.
+ let fractional = fractional_seconds(total_duration);
+ json::to_writer(self.get_mut(), &json!({
+ "type": "summary",
+ "data": {
+ "stats": stats,
+ "elapsed_total": {
+ "secs": total_duration.as_secs(),
+ "nanos": total_duration.subsec_nanos(),
+ "human": format!("{:0.6}s", fractional),
+ },
+ }
+ }))?;
+ write!(self.get_mut(), "\n")
+ }
+
+ /// Return a mutable reference to the underlying printer's writer.
+ pub fn get_mut(&mut self) -> &mut W {
+ match *self {
+ Printer::Standard(ref mut p) => p.get_mut(),
+ Printer::Summary(ref mut p) => p.get_mut(),
+ Printer::JSON(ref mut p) => p.get_mut(),
+ }
+ }
+}
+
+/// A worker for executing searches.
+///
+/// It is intended for a single worker to execute many searches, and is
+/// generally intended to be used from a single thread. When searching using
+/// multiple threads, it is better to create a new worker for each thread.
+#[derive(Debug)]
+pub struct SearchWorker<W> {
+ config: Config,
+ matcher: PatternMatcher,
+ searcher: Searcher,
+ printer: Printer<W>,
+}
+
+impl<W: WriteColor> SearchWorker<W> {
+ /// Execute a search over the given subject.
+ pub fn search(&mut self, subject: &Subject) -> io::Result<SearchResult> {
+ self.search_impl(subject)
+ }
+
+ /// Return a mutable reference to the underlying printer.
+ pub fn printer(&mut self) -> &mut Printer<W> {
+ &mut self.printer
+ }
+
+ /// Print the given statistics to the underlying writer in a way that is
+ /// consistent with this searcher's printer's format.
+ ///
+ /// While `Stats` contains a duration itself, this only corresponds to the
+ /// time spent searching, where as `total_duration` should roughly
+ /// approximate the lifespan of the ripgrep process itself.
+ pub fn print_stats(
+ &mut self,
+ total_duration: Duration,
+ stats: &Stats,
+ ) -> io::Result<()> {
+ if self.config.json_stats {
+ self.printer().print_stats_json(total_duration, stats)
+ } else {
+ self.printer().print_stats(total_duration, stats)
+ }
+ }
+
+ /// Search the given subject using the appropriate strategy.
+ fn search_impl(&mut self, subject: &Subject) -> io::Result<SearchResult> {
+ let path = subject.path();
+ if subject.is_stdin() {
+ let stdin = io::stdin();
+ // A `return` here appeases the borrow checker. NLL will fix this.
+ return self.search_reader(path, stdin.lock());
+ } else if self.config.preprocessor.is_some() {
+ let cmd = self.config.preprocessor.clone().unwrap();
+ let rdr = PreprocessorReader::from_cmd_path(cmd, path)?;
+ self.search_reader(path, rdr)
+ } else if self.config.search_zip && is_compressed(path) {
+ match DecompressionReader::from_path(path) {
+ None => Ok(SearchResult::default()),
+ Some(rdr) => self.search_reader(path, rdr),
+ }
+ } else {
+ self.search_path(path)
+ }
+ }
+
+ /// Search the contents of the given file path.
+ fn search_path(&mut self, path: &Path) -> io::Result<SearchResult> {
+ use self::PatternMatcher::*;
+
+ let (searcher, printer) = (&mut self.searcher, &mut self.printer);
+ match self.matcher {
+ RustRegex(ref m) => search_path(m, searcher, printer, path),
+ #[cfg(feature = "pcre2")]
+ PCRE2(ref m) => search_path(m, searcher, printer, path),
+ }
+ }
+
+ /// Executes a search on the given reader, which may or may not correspond
+ /// directly to the contents of the given file path. Instead, the reader
+ /// may actually cause something else to be searched (for example, when
+ /// a preprocessor is set or when decompression is enabled). In those
+ /// cases, the file path is used for visual purposes only.
+ ///
+ /// Generally speaking, this method should only be used when there is no
+ /// other choice. Searching via `search_path` provides more opportunities
+ /// for optimizations (such as memory maps).
+ fn search_reader<R: io::Read>(
+ &mut self,
+ path: &Path,
+ rdr: R,
+ ) -> io::Result<SearchResult> {
+ use self::PatternMatcher::*;
+
+ let (searcher, printer) = (&mut self.searcher, &mut self.printer);
+ match self.matcher {
+ RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr),
+ #[cfg(feature = "pcre2")]
+ PCRE2(ref m) => search_reader(m, searcher, printer, path, rdr),
+ }
+ }
+}
+
+/// Search the contents of the given file path using the given matcher,
+/// searcher and printer.
+fn search_path<M: Matcher, W: WriteColor>(
+ matcher: M,
+ searcher: &mut Searcher,
+ printer: &mut Printer<W>,
+ path: &Path,
+) -> io::Result<SearchResult> {
+ match *printer {
+ Printer::Standard(ref mut p) => {
+ let mut sink = p.sink_with_path(&matcher, path);
+ searcher.search_path(&matcher, path, &mut sink)?;
+ Ok(SearchResult {
+ has_match: sink.has_match(),
+ stats: sink.stats().map(|s| s.clone()),
+ })
+ }
+ Printer::Summary(ref mut p) => {
+ let mut sink = p.sink_with_path(&matcher, path);
+ searcher.search_path(&matcher, path, &mut sink)?;
+ Ok(SearchResult {
+ has_match: sink.has_match(),
+ stats: sink.stats().map(|s| s.clone()),
+ })
+ }
+ Printer::JSON(ref mut p) => {
+ let mut sink = p.sink_with_path(&matcher, path);
+ searcher.search_path(&matcher, path, &mut sink)?;
+ Ok(SearchResult {
+ has_match: sink.has_match(),
+ stats: Some(sink.stats().clone()),
+ })
+ }
+ }
+}
+
+/// Search the contents of the given reader using the given matcher, searcher
+/// and printer.
+fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
+ matcher: M,
+ searcher: &mut Searcher,
+ printer: &mut Printer<W>,
+ path: &Path,
+ rdr: R,
+) -> io::Result<SearchResult> {
+ match *printer {
+ Printer::Standard(ref mut p) => {
+ let mut sink = p.sink_with_path(&matcher, path);
+ searcher.search_reader(&matcher, rdr, &mut sink)?;
+ Ok(SearchResult {
+ has_match: sink.has_match(),
+ stats: sink.stats().map(|s| s.clone()),
+ })
+ }
+ Printer::Summary(ref mut p) => {
+ let mut sink = p.sink_with_path(&matcher, path);
+ searcher.search_reader(&matcher, rdr, &mut sink)?;
+ Ok(SearchResult {
+ has_match: sink.has_match(),
+ stats: sink.stats().map(|s| s.clone()),
+ })
+ }
+ Printer::JSON(ref mut p) => {
+ let mut sink = p.sink_with_path(&matcher, path);
+ searcher.search_reader(&matcher, rdr, &mut sink)?;
+ Ok(SearchResult {
+ has_match: sink.has_match(),
+ stats: Some(sink.stats().clone()),
+ })
+ }
+ }
+}
+
+/// Return the given duration as fractional seconds.
+fn fractional_seconds(duration: Duration) -> f64 {
+ (duration.as_secs() as f64) + (duration.subsec_nanos() as f64 * 1e-9)
+}