summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorCharles Blake <cb@cblake.net>2018-07-13 09:54:51 -0400
committerAndrew Gallant <jamslam@gmail.com>2018-07-21 17:25:12 -0400
commit231456c409ff38c75c39d01b781b569965ddf808 (patch)
tree592b89acd3172b0c23acdfe6113bcd70ea11aa3a /src
parent1d09d4d31ba3ac2eb09edf31e8ec46b2b5cec388 (diff)
ripgrep: add --pre flag
The preprocessor flag accepts a command program and executes this program for every input file that is searched. Instead of searching the file directly, ripgrep will instead search the stdout contents of the program. Closes #978, Closes #981
Diffstat (limited to 'src')
-rw-r--r--src/app.rs55
-rw-r--r--src/args.rs16
-rw-r--r--src/main.rs1
-rw-r--r--src/preprocessor.rs92
-rw-r--r--src/worker.rs24
5 files changed, 184 insertions, 4 deletions
diff --git a/src/app.rs b/src/app.rs
index a0fdf946..67b7295e 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -534,6 +534,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_only_matching(&mut args);
flag_path_separator(&mut args);
flag_passthru(&mut args);
+ flag_pre(&mut args);
flag_pretty(&mut args);
flag_quiet(&mut args);
flag_regex_size_limit(&mut args);
@@ -1453,12 +1454,62 @@ This flag can be disabled with --no-search-zip.
");
let arg = RGArg::switch("search-zip").short("z")
.help(SHORT).long_help(LONG)
- .overrides("no-search-zip");
+ .overrides("no-search-zip")
+ .overrides("pre");
args.push(arg);
let arg = RGArg::switch("no-search-zip")
.hidden()
- .overrides("search-zip");
+ .overrides("search-zip")
+ .overrides("pre");
+ args.push(arg);
+}
+
+fn flag_pre(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "search outputs of COMMAND FILE for each FILE";
+ const LONG: &str = long!("\
+For each input FILE, search the standard output of COMMAND FILE rather than the
+contents of FILE. This option expects the COMMAND program to either be an
+absolute path or to be available in your PATH. An empty string COMMAND
+deactivates this feature.
+
+A preprocessor is not run when ripgrep is searching stdin.
+
+When searching over sets of files that may require one of several decoders
+as preprocessors, COMMAND should be a wrapper program or script which first
+classifies FILE based on magic numbers/content or based on the FILE name and
+then dispatches to an appropriate preprocessor. Each COMMAND also has its
+standard input connected to FILE for convenience.
+
+For example, a shell script for COMMAND might look like:
+
+ case \"$1\" in
+ *.pdf)
+ exec pdftotext \"$1\" -
+ ;;
+ *)
+ case $(file \"$1\") in
+ *Zstandard*)
+ exec pzstd -cdq
+ ;;
+ *)
+ exec cat
+ ;;
+ esac
+ ;;
+ esac
+
+The above script uses `pdftotext` to convert a PDF file to plain text. For
+all other files, the script uses the `file` utility to sniff the type of the
+file based on its contents. If it is a compressed file in the Zstandard format,
+then `pzstd` is used to decompress the contents to stdout.
+
+This overrides the -z/--search-zip flag.
+");
+ let arg = RGArg::flag("pre", "COMMAND")
+ .help(SHORT).long_help(LONG)
+ .overrides("search-zip")
+ .overrides("no-search-zip");
args.push(arg);
}
diff --git a/src/args.rs b/src/args.rs
index aca9bcd5..302e330e 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -80,6 +80,7 @@ pub struct Args {
types: Types,
with_filename: bool,
search_zip_files: bool,
+ preprocessor: Option<PathBuf>,
stats: bool
}
@@ -288,6 +289,7 @@ impl Args {
.quiet(self.quiet)
.text(self.text)
.search_zip_files(self.search_zip_files)
+ .preprocessor(self.preprocessor.clone())
.build()
}
@@ -429,6 +431,7 @@ impl<'a> ArgMatches<'a> {
types: self.types()?,
with_filename: with_filename,
search_zip_files: self.is_present("search-zip"),
+ preprocessor: self.preprocessor(),
stats: self.stats()
};
if args.mmap {
@@ -722,6 +725,19 @@ impl<'a> ArgMatches<'a> {
}
}
+ /// Returns the preprocessor command
+ fn preprocessor(&self) -> Option<PathBuf> {
+ if let Some(path) = self.value_of_os("pre") {
+ if path.is_empty() {
+ None
+ } else {
+ Some(Path::new(path).to_path_buf())
+ }
+ } else {
+ None
+ }
+ }
+
/// Returns the unescaped path separator in UTF-8 bytes.
fn path_separator(&self) -> Result<Option<u8>> {
match self.value_of_lossy("path-separator") {
diff --git a/src/main.rs b/src/main.rs
index 6f010135..ab0e4118 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -43,6 +43,7 @@ mod args;
mod config;
mod decoder;
mod decompressor;
+mod preprocessor;
mod logger;
mod pathutil;
mod printer;
diff --git a/src/preprocessor.rs b/src/preprocessor.rs
new file mode 100644
index 00000000..bb464f86
--- /dev/null
+++ b/src/preprocessor.rs
@@ -0,0 +1,92 @@
+use std::fs::File;
+use std::io::{self, Read};
+use std::path::{Path, PathBuf};
+use std::process::{self, Stdio};
+
+use Result;
+
+/// PreprocessorReader provides an `io::Read` impl to read kids output.
+#[derive(Debug)]
+pub struct PreprocessorReader {
+ cmd: PathBuf,
+ path: PathBuf,
+ child: process::Child,
+ done: bool,
+}
+
+impl PreprocessorReader {
+ /// Returns a handle to the stdout of the spawned preprocessor process for
+ /// `path`, which can be directly searched in the worker. When the returned
+ /// value is exhausted, the underlying process is reaped. If the underlying
+ /// process fails, then its stderr is read and converted into a normal
+ /// io::Error.
+ ///
+ /// If there is any error in spawning the preprocessor command, then
+ /// return the corresponding error.
+ pub fn from_cmd_path(
+ cmd: PathBuf,
+ path: &Path,
+ ) -> Result<PreprocessorReader> {
+ let child = process::Command::new(&cmd)
+ .arg(path)
+ .stdin(Stdio::from(File::open(path)?))
+ .stdout(Stdio::piped())
+ .stderr(Stdio::piped())
+ .spawn()
+ .map_err(|err| {
+ format!(
+ "error running preprocessor command '{}': {}",
+ cmd.display(),
+ err,
+ )
+ })?;
+ Ok(PreprocessorReader {
+ cmd: cmd,
+ path: path.to_path_buf(),
+ child: child,
+ done: false,
+ })
+ }
+
+ fn read_error(&mut self) -> io::Result<io::Error> {
+ let mut errbytes = vec![];
+ self.child.stderr.as_mut().unwrap().read_to_end(&mut errbytes)?;
+ let errstr = String::from_utf8_lossy(&errbytes);
+ let errstr = errstr.trim();
+
+ Ok(if errstr.is_empty() {
+ let msg = format!(
+ "preprocessor command failed: '{} {}'",
+ self.cmd.display(),
+ self.path.display(),
+ );
+ io::Error::new(io::ErrorKind::Other, msg)
+ } else {
+ let msg = format!(
+ "preprocessor command failed: '{} {}': {}",
+ self.cmd.display(),
+ self.path.display(),
+ errstr,
+ );
+ io::Error::new(io::ErrorKind::Other, msg)
+ })
+ }
+}
+
+impl io::Read for PreprocessorReader {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ if self.done {
+ return Ok(0);
+ }
+ let nread = self.child.stdout.as_mut().unwrap().read(buf)?;
+ if nread == 0 {
+ self.done = true;
+ // Reap the child now that we're done reading.
+ // If the command failed, report stderr as an error.
+ if !self.child.wait()?.success() {
+ return Err(self.read_error()?);
+ }
+ }
+ Ok(nread)
+ }
+}
diff --git a/src/worker.rs b/src/worker.rs
index a8327cda..5b7ef0a4 100644
--- a/src/worker.rs
+++ b/src/worker.rs
@@ -1,6 +1,6 @@
use std::fs::File;
use std::io;
-use std::path::Path;
+use std::path::{Path, PathBuf};
use encoding_rs::Encoding;
use grep::Grep;
@@ -10,6 +10,7 @@ use termcolor::WriteColor;
use decoder::DecodeReader;
use decompressor::{self, DecompressionReader};
+use preprocessor::PreprocessorReader;
use pathutil::strip_prefix;
use printer::Printer;
use search_buffer::BufferSearcher;
@@ -45,6 +46,7 @@ struct Options {
no_messages: bool,
quiet: bool,
text: bool,
+ preprocessor: Option<PathBuf>,
search_zip_files: bool
}
@@ -68,6 +70,7 @@ impl Default for Options {
quiet: false,
text: false,
search_zip_files: false,
+ preprocessor: None,
}
}
}
@@ -222,6 +225,12 @@ impl WorkerBuilder {
self.opts.search_zip_files = yes;
self
}
+
+ /// If non-empty, search output of preprocessor run on each file
+ pub fn preprocessor(mut self, command: Option<PathBuf>) -> Self {
+ self.opts.preprocessor = command;
+ self
+ }
}
/// Worker is responsible for executing searches on file paths, while choosing
@@ -250,7 +259,18 @@ impl Worker {
}
Work::DirEntry(dent) => {
let mut path = dent.path();
- if self.opts.search_zip_files
+ if self.opts.preprocessor.is_some() {
+ let cmd = self.opts.preprocessor.clone().unwrap();
+ match PreprocessorReader::from_cmd_path(cmd, path) {
+ Ok(reader) => self.search(printer, path, reader),
+ Err(err) => {
+ if !self.opts.no_messages {
+ eprintln!("{}", err);
+ }
+ return 0;
+ }
+ }
+ } else if self.opts.search_zip_files
&& decompressor::is_compressed(path)
{
match DecompressionReader::from_path(path) {