summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2018-08-29 20:53:52 -0400
committerAndrew Gallant <jamslam@gmail.com>2018-09-04 23:18:55 -0400
commit4846d63539690047fa58ec582d94bcba16da1c09 (patch)
tree61a2cf9de3d62ea6524659893ab9a2c7800c3286 /src
parent13c47530a6e685d2dee1953a64f055936e6a2ba8 (diff)
grep-cli: introduce new grep-cli crate
This commit moves a lot of "utility" code from ripgrep core into grep-cli. Any one of these things might not be worth creating a new crate, but combining everything together results in a fair number of a convenience routines that make up a decent sized crate. There is potentially more we could move into the crate, but much of what remains in ripgrep core is almost entirely dealing with the number of flags we support. In the course of doing moving things to the grep-cli crate, we clean up a lot of gunk and improve failure modes in a number of cases. In particular, we've fixed a bug where other processes could deadlock if they write too much to stderr. Fixes #990
Diffstat (limited to 'src')
-rw-r--r--src/app.rs8
-rw-r--r--src/args.rs149
-rw-r--r--src/decompressor.rs190
-rw-r--r--src/main.rs10
-rw-r--r--src/preprocessor.rs93
-rw-r--r--src/search.rs73
-rw-r--r--src/unescape.rs137
7 files changed, 99 insertions, 561 deletions
diff --git a/src/app.rs b/src/app.rs
index 059effbd..039b6980 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -80,7 +80,7 @@ pub fn app() -> App<'static, 'static> {
/// Return the "long" format of ripgrep's version string.
///
/// If a revision hash is given, then it is used. If one isn't given, then
-/// the RIPGREP_BUILD_GIT_HASH env var is inspect for it. If that isn't set,
+/// the RIPGREP_BUILD_GIT_HASH env var is inspected for it. If that isn't set,
/// then a revision hash is not included in the version string returned.
pub fn long_version(revision_hash: Option<&str>) -> String {
// Do we have a git hash?
@@ -537,7 +537,11 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
// The positional arguments must be defined first and in order.
arg_pattern(&mut args);
arg_path(&mut args);
- // Flags can be defined in any order, but we do it alphabetically.
+ // Flags can be defined in any order, but we do it alphabetically. Note
+ // that each function may define multiple flags. For example,
+ // `flag_encoding` defines `--encoding` and `--no-encoding`. Most `--no`
+ // flags are hidden and merely mentioned in the docs of the corresponding
+ // "positive" flag.
flag_after_context(&mut args);
flag_before_context(&mut args);
flag_byte_offset(&mut args);
diff --git a/src/args.rs b/src/args.rs
index de84f094..2decefe7 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -1,14 +1,14 @@
use std::cmp;
use std::env;
use std::ffi::OsStr;
-use std::fs::{self, File};
-use std::io::{self, BufRead};
+use std::fs;
+use std::io;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::SystemTime;
-use atty;
use clap;
+use grep::cli;
use grep::matcher::LineTerminator;
#[cfg(feature = "pcre2")]
use grep::pcre2::{
@@ -20,6 +20,7 @@ use grep::printer::{
JSON, JSONBuilder,
Standard, StandardBuilder,
Summary, SummaryBuilder, SummaryKind,
+ default_color_specs,
};
use grep::regex::{
RegexMatcher as RustRegexMatcher,
@@ -34,11 +35,10 @@ use ignore::{Walk, WalkBuilder, WalkParallel};
use log;
use num_cpus;
use path_printer::{PathPrinter, PathPrinterBuilder};
-use regex::{self, Regex};
-use same_file::Handle;
+use regex;
use termcolor::{
WriteColor,
- BufferedStandardStream, BufferWriter, ColorChoice, StandardStream,
+ BufferWriter, ColorChoice,
};
use app;
@@ -47,7 +47,6 @@ use logger::Logger;
use messages::{set_messages, set_ignore_messages};
use search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder};
use subject::SubjectBuilder;
-use unescape::{escape, unescape};
use Result;
/// The command that ripgrep should execute based on the command line
@@ -314,13 +313,8 @@ impl Args {
/// Execute the given function with a writer to stdout that enables color
/// support based on the command line configuration.
- pub fn stdout(&self) -> Box<WriteColor + Send> {
- let color_choice = self.matches().color_choice();
- if atty::is(atty::Stream::Stdout) {
- Box::new(StandardStream::stdout(color_choice))
- } else {
- Box::new(BufferedStandardStream::stdout(color_choice))
- }
+ pub fn stdout(&self) -> cli::StandardStream {
+ cli::stdout(self.matches().color_choice())
}
/// Return the type definitions compiled into ripgrep.
@@ -628,8 +622,8 @@ impl ArgMatches {
.caseless(self.case_insensitive())
.multi_line(true)
.word(self.is_present("word-regexp"));
- // For whatever reason, the JIT craps out during compilation with a
- // "no more memory" error on 32 bit systems. So don't use it there.
+ // For whatever reason, the JIT craps out during regex compilation with
+ // a "no more memory" error on 32 bit systems. So don't use it there.
if !cfg!(target_pointer_width = "32") {
builder.jit(true);
}
@@ -638,7 +632,7 @@ impl ArgMatches {
if self.encoding()?.is_some() {
// SAFETY: If an encoding was specified, then we're guaranteed
// to get valid UTF-8, so we can disable PCRE2's UTF checking.
- // (Feeding invalid UTF-8 to PCRE2 is UB.)
+ // (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
unsafe {
builder.disable_utf_check();
}
@@ -853,7 +847,7 @@ impl ArgMatches {
} else if preference == "ansi" {
ColorChoice::AlwaysAnsi
} else if preference == "auto" {
- if atty::is(atty::Stream::Stdout) || self.is_present("pretty") {
+ if cli::is_tty_stdout() || self.is_present("pretty") {
ColorChoice::Auto
} else {
ColorChoice::Never
@@ -869,15 +863,7 @@ impl ArgMatches {
/// is returned.
fn color_specs(&self) -> Result<ColorSpecs> {
// Start with a default set of color specs.
- let mut specs = vec![
- #[cfg(unix)]
- "path:fg:magenta".parse().unwrap(),
- #[cfg(windows)]
- "path:fg:cyan".parse().unwrap(),
- "line:fg:green".parse().unwrap(),
- "match:fg:red".parse().unwrap(),
- "match:style:bold".parse().unwrap(),
- ];
+ let mut specs = default_color_specs();
for spec_str in self.values_of_lossy_vec("colors") {
specs.push(spec_str.parse()?);
}
@@ -913,9 +899,9 @@ impl ArgMatches {
///
/// If one was not provided, the default `--` is returned.
fn context_separator(&self) -> Vec<u8> {
- match self.value_of_lossy("context-separator") {
+ match self.value_of_os("context-separator") {
None => b"--".to_vec(),
- Some(sep) => unescape(&sep),
+ Some(sep) => cli::unescape_os(&sep),
}
}
@@ -990,7 +976,7 @@ impl ArgMatches {
if self.is_present("no-heading") || self.is_present("vimgrep") {
false
} else {
- atty::is(atty::Stream::Stdout)
+ cli::is_tty_stdout()
|| self.is_present("heading")
|| self.is_present("pretty")
}
@@ -1042,7 +1028,7 @@ impl ArgMatches {
// generally want to show line numbers by default when printing to a
// tty for human consumption, except for one interesting case: when
// we're only searching stdin. This makes pipelines work as expected.
- (atty::is(atty::Stream::Stdout) && !self.is_only_stdin(paths))
+ (cli::is_tty_stdout() && !self.is_only_stdin(paths))
|| self.is_present("line-number")
|| self.is_present("column")
|| self.is_present("pretty")
@@ -1177,8 +1163,7 @@ impl ArgMatches {
let file_is_stdin = self.values_of_os("file")
.map_or(false, |mut files| files.any(|f| f == "-"));
let search_cwd =
- atty::is(atty::Stream::Stdin)
- || !stdin_is_readable()
+ !cli::is_readable_stdin()
|| (self.is_present("file") && file_is_stdin)
|| self.is_present("files")
|| self.is_present("type-list");
@@ -1194,9 +1179,9 @@ impl ArgMatches {
/// If the provided path separator is more than a single byte, then an
/// error is returned.
fn path_separator(&self) -> Result<Option<u8>> {
- let sep = match self.value_of_lossy("path-separator") {
+ let sep = match self.value_of_os("path-separator") {
None => return Ok(None),
- Some(sep) => unescape(&sep),
+ Some(sep) => cli::unescape_os(&sep),
};
if sep.is_empty() {
Ok(None)
@@ -1207,7 +1192,7 @@ impl ArgMatches {
In some shells on Windows '/' is automatically \
expanded. Use '//' instead.",
sep.len(),
- escape(&sep),
+ cli::escape(&sep),
)))
} else {
Ok(Some(sep[0]))
@@ -1254,18 +1239,12 @@ impl ArgMatches {
}
}
}
- if let Some(files) = self.values_of_os("file") {
- for file in files {
- if file == "-" {
- let stdin = io::stdin();
- for line in stdin.lock().lines() {
- pats.push(self.pattern_from_str(&line?));
- }
+ if let Some(paths) = self.values_of_os("file") {
+ for path in paths {
+ if path == "-" {
+ pats.extend(cli::patterns_from_stdin()?);
} else {
- let f = File::open(file)?;
- for line in io::BufReader::new(f).lines() {
- pats.push(self.pattern_from_str(&line?));
- }
+ pats.extend(cli::patterns_from_path(path)?);
}
}
}
@@ -1287,7 +1266,7 @@ impl ArgMatches {
///
/// If the pattern is not valid UTF-8, then an error is returned.
fn pattern_from_os_str(&self, pat: &OsStr) -> Result<String> {
- let s = pattern_to_str(pat)?;
+ let s = cli::pattern_from_os(pat)?;
Ok(self.pattern_from_str(s))
}
@@ -1495,40 +1474,11 @@ impl ArgMatches {
&self,
arg_name: &str,
) -> Result<Option<u64>> {
- lazy_static! {
- static ref RE: Regex = Regex::new(r"^([0-9]+)([KMG])?$").unwrap();
- }
-
- let arg_value = match self.value_of_lossy(arg_name) {
- Some(x) => x,
- None => return Ok(None)
- };
- let caps = RE
- .captures(&arg_value)
- .ok_or_else(|| {
- format!("invalid format for {}", arg_name)
- })?;
-
- let value = caps[1].parse::<u64>()?;
- let suffix = caps.get(2).map(|x| x.as_str());
-
- let v_10 = value.checked_mul(1024);
- let v_20 = v_10.and_then(|x| x.checked_mul(1024));
- let v_30 = v_20.and_then(|x| x.checked_mul(1024));
- let try_suffix = |x: Option<u64>| {
- if x.is_some() {
- Ok(x)
- } else {
- Err(From::from(format!("number too large for {}", arg_name)))
- }
+ let size = match self.value_of_lossy(arg_name) {
+ None => return Ok(None),
+ Some(size) => size,
};
- match suffix {
- None => Ok(Some(value)),
- Some("K") => try_suffix(v_10),
- Some("M") => try_suffix(v_20),
- Some("G") => try_suffix(v_30),
- _ => Err(From::from(format!("invalid suffix for {}", arg_name)))
- }
+ Ok(Some(cli::parse_human_readable_size(&size)?))
}
}
@@ -1562,21 +1512,6 @@ impl ArgMatches {
}
}
-/// Convert an OsStr to a Unicode string.
-///
-/// Patterns _must_ be valid UTF-8, so if the given OsStr isn't valid UTF-8,
-/// this returns an error.
-fn pattern_to_str(s: &OsStr) -> Result<&str> {
- s.to_str().ok_or_else(|| {
- From::from(format!(
- "Argument '{}' is not valid UTF-8. \
- Use hex escape sequences to match arbitrary \
- bytes in a pattern (e.g., \\xFF).",
- s.to_string_lossy()
- ))
- })
-}
-
/// Inspect an error resulting from building a Rust regex matcher, and if it's
/// believed to correspond to a syntax error that PCRE2 could handle, then
/// add a message to suggest the use of -P/--pcre2.
@@ -1638,25 +1573,3 @@ where G: Fn(&fs::Metadata) -> io::Result<SystemTime>
t1.cmp(&t2)
}
}
-
-/// Returns true if and only if stdin is deemed searchable.
-#[cfg(unix)]
-fn stdin_is_readable() -> bool {
- use std::os::unix::fs::FileTypeExt;
-
- let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) {
- Err(_) => return false,
- Ok(md) => md.file_type(),
- };
- ft.is_file() || ft.is_fifo()
-}
-
-/// Returns true if and only if stdin is deemed searchable.
-#[cfg(windows)]
-fn stdin_is_readable() -> bool {
- use winapi_util as winutil;
-
- winutil::file::typ(winutil::HandleRef::stdin())
- .map(|t| t.is_disk() || t.is_pipe())
- .unwrap_or(false)
-}
diff --git a/src/decompressor.rs b/src/decompressor.rs
deleted file mode 100644
index d25c2f56..00000000
--- a/src/decompressor.rs
+++ /dev/null
@@ -1,190 +0,0 @@
-use std::collections::HashMap;
-use std::ffi::OsStr;
-use std::fmt;
-use std::io::{self, Read};
-use std::path::Path;
-use std::process::{self, Stdio};
-
-use globset::{Glob, GlobSet, GlobSetBuilder};
-
-/// A decompression command, contains the command to be spawned as well as any
-/// necessary CLI args.
-#[derive(Clone, Copy, Debug)]
-struct DecompressionCommand {
- cmd: &'static str,
- args: &'static [&'static str],
-}
-
-impl DecompressionCommand {
- /// Create a new decompress command
- fn new(
- cmd: &'static str,
- args: &'static [&'static str],
- ) -> DecompressionCommand {
- DecompressionCommand {
- cmd, args
- }
- }
-}
-
-impl fmt::Display for DecompressionCommand {
- fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{} {}", self.cmd, self.args.join(" "))
- }
-}
-
-lazy_static! {
- static ref DECOMPRESSION_COMMANDS: HashMap<
- &'static str,
- DecompressionCommand,
- > = {
- let mut m = HashMap::new();
-
- const ARGS: &[&str] = &["-d", "-c"];
- m.insert("gz", DecompressionCommand::new("gzip", ARGS));
- m.insert("bz2", DecompressionCommand::new("bzip2", ARGS));
- m.insert("xz", DecompressionCommand::new("xz", ARGS));
- m.insert("lz4", DecompressionCommand::new("lz4", ARGS));
-
- const LZMA_ARGS: &[&str] = &["--format=lzma", "-d", "-c"];
- m.insert("lzma", DecompressionCommand::new("xz", LZMA_ARGS));
-
- m
- };
- static ref SUPPORTED_COMPRESSION_FORMATS: GlobSet = {
- let mut builder = GlobSetBuilder::new();
- builder.add(Glob::new("*.gz").unwrap());
- builder.add(Glob::new("*.bz2").unwrap());
- builder.add(Glob::new("*.xz").unwrap());
- builder.add(Glob::new("*.lz4").unwrap());
- builder.add(Glob::new("*.lzma").unwrap());
- builder.build().unwrap()
- };
- static ref TAR_ARCHIVE_FORMATS: GlobSet = {
- let mut builder = GlobSetBuilder::new();
- builder.add(Glob::new("*.tar.gz").unwrap());
- builder.add(Glob::new("*.tar.xz").unwrap());
- builder.add(Glob::new("*.tar.bz2").unwrap());
- builder.add(Glob::new("*.tar.lz4").unwrap());
- builder.add(Glob::new("*.tgz").unwrap());
- builder.add(Glob::new("*.txz").unwrap());
- builder.add(Glob::new("*.tbz2").unwrap());
- builder.build().unwrap()
- };
-}
-
-/// DecompressionReader provides an `io::Read` implementation for a limited
-/// set of compression formats.
-#[derive(Debug)]
-pub struct DecompressionReader {
- cmd: DecompressionCommand,
- child: process::Child,
- done: bool,
-}
-
-impl DecompressionReader {
- /// Returns a handle to the stdout of the spawned decompression process for
- /// `path`, which can be directly searched in the worker. When the returned
- /// value is exhausted, the underlying process is reaped. If the underlying
- /// process fails, then its stderr is read and converted into a normal
- /// io::Error.
- ///
- /// If there is any error in spawning the decompression command, then
- /// return `None`, after outputting any necessary debug or error messages.
- pub fn from_path(path: &Path) -> Option<DecompressionReader> {
- let extension = match path.extension().and_then(OsStr::to_str) {
- Some(extension) => extension,
- None => {
- debug!(
- "{}: failed to get compresson extension", path.display());
- return None;
- }
- };
- let decompression_cmd = match DECOMPRESSION_COMMANDS.get(extension) {
- Some(cmd) => cmd,
- None => {
- debug!(
- "{}: failed to get decompression command", path.display());
- return None;
- }
- };
- let cmd = process::Command::new(decompression_cmd.cmd)
- .args(decompression_cmd.args)
- .arg(path)
- .stdout(Stdio::piped())
- .stderr(Stdio::piped())
- .spawn();
- let child = match cmd {
- Ok(process) => process,
- Err(_) => {
- debug!(
- "{}: decompression command '{}' not found",
- path.display(), decompression_cmd.cmd);
- return None;
- }
- };
- Some(DecompressionReader::new(*decompression_cmd, child))
- }
-
- fn new(
- cmd: DecompressionCommand,
- child: process::Child,
- ) -> DecompressionReader {
- DecompressionReader {
- cmd: cmd,
- child: child,
- done: false,
- }
- }
-
- fn read_error(&mut self) -> io::Result<io::Error> {
- let mut errbytes = vec![];
- self.child.stderr.as_mut().unwrap().read_to_end(&mut errbytes)?;
- let errstr = String::from_utf8_lossy(&errbytes);
- let errstr = errstr.trim();
-
- Ok(if errstr.is_empty() {
- let msg = format!("decompression command failed: '{}'", self.cmd);
- io::Error::new(io::ErrorKind::Other, msg)
- } else {
- let msg = format!(
- "decompression command '{}' failed: {}", self.cmd, errstr);
- io::Error::new(io::ErrorKind::Other, msg)
- })
- }
-}
-
-impl io::Read for DecompressionReader {
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- if self.done {
- return Ok(0);
- }
- let nread = self.child.stdout.as_mut().unwrap().read(buf)?;
- if nread == 0 {
- self.done = true;
- // Reap the child now that we're done reading.
- // If the command failed, report stderr as an error.
- if !self.child.wait()?.success() {
- return Err(self.read_error()?);
- }
- }
- Ok(nread)
- }
-}
-
-/// Returns true if the given path contains a supported compression format or
-/// is a TAR archive.
-pub fn is_compressed(path: &Path) -> bool {
- is_supported_compression_format(path) || is_tar_archive(path)
-}
-
-/// Returns true if the given path matches any one of the supported compression
-/// formats
-fn is_supported_compression_format(path: &Path) -> bool {
- SUPPORTED_COMPRESSION_FORMATS.is_match(path)
-}
-
-/// Returns true if the given path matches any of the known TAR file formats.
-fn is_tar_archive(path: &Path) -> bool {
- TAR_ARCHIVE_FORMATS.is_match(path)
-}
diff --git a/src/main.rs b/src/main.rs
index 4a4ac5f0..bcadc8a1 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,5 @@
-extern crate atty;
#[macro_use]
extern crate clap;
-extern crate globset;
extern crate grep;
extern crate ignore;
#[macro_use]
@@ -10,14 +8,11 @@ extern crate lazy_static;
extern crate log;
extern crate num_cpus;
extern crate regex;
-extern crate same_file;
#[macro_use]
extern crate serde_json;
extern crate termcolor;
-#[cfg(windows)]
-extern crate winapi_util;
-use std::io;
+use std::io::{self, Write};
use std::process;
use std::sync::{Arc, Mutex};
use std::time::Instant;
@@ -33,13 +28,10 @@ mod messages;
mod app;
mod args;
mod config;
-mod decompressor;
-mod preprocessor;
mod logger;
mod path_printer;
mod search;
mod subject;
-mod unescape;
type Result<T> = ::std::result::Result<T, Box<::std::error::Error>>;
diff --git a/src/preprocessor.rs b/src/preprocessor.rs
deleted file mode 100644
index 07f66e2d..00000000
--- a/src/preprocessor.rs
+++ /dev/null
@@ -1,93 +0,0 @@
-use std::fs::File;
-use std::io::{self, Read};
-use std::path::{Path, PathBuf};
-use std::process::{self, Stdio};
-
-/// PreprocessorReader provides an `io::Read` impl to read kids output.
-#[derive(Debug)]
-pub struct PreprocessorReader {
- cmd: PathBuf,
- path: PathBuf,
- child: process::Child,
- done: bool,
-}
-
-impl PreprocessorReader {
- /// Returns a handle to the stdout of the spawned preprocessor process for
- /// `path`, which can be directly searched in the worker. When the returned
- /// value is exhausted, the underlying process is reaped. If the underlying
- /// process fails, then its stderr is read and converted into a normal
- /// io::Error.
- ///
- /// If there is any error in spawning the preprocessor command, then
- /// return the corresponding error.
- pub fn from_cmd_path(
- cmd: PathBuf,
- path: &Path,
- ) -> io::Result<PreprocessorReader> {
- let child = process::Command::new(&cmd)
- .arg(path)
- .stdin(Stdio::from(File::open(path)?))
- .stdout(Stdio::piped())
- .stderr(Stdio::piped())
- .spawn()
- .map_err(|err| {
- io::Error::new(
- io::ErrorKind::Other,
- format!(
- "error running preprocessor command '{}': {}",
- cmd.display(),
- err,
- ),
- )
- })?;
- Ok(PreprocessorReader {
- cmd: cmd,
- path: path.to_path_buf(),
- child: child,
- done: false,
- })
- }
-
- fn read_error(&mut self) -> io::Result<io::Error> {
- let mut errbytes = vec![];
- self.child.stderr.as_mut().unwrap().read_to_end(&mut errbytes)?;
- let errstr = String::from_utf8_lossy(&errbytes);
- let errstr = errstr.trim();
-
- Ok(if errstr.is_empty() {
- let msg = format!(
- "preprocessor command failed: '{} {}'",
- self.cmd.display(),
- self.path.display(),
- );
- io::Error::new(io::ErrorKind::Other, msg)
- } else {
- let msg = format!(
- "preprocessor command failed: '{} {}': {}",
- self.cmd.display(),
- self.path.display(),
- errstr,
- );
- io::Error::new(io::ErrorKind::Other, msg)
- })
- }
-}
-
-impl io::Read for PreprocessorReader {
- fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
- if self.done {
- return Ok(0);
- }
- let nread = self.child.stdout.as_mut().unwrap().read(buf)?;
- if nread == 0 {
- self.done = true;
- // Reap the child now that we're done reading.
- // If the command failed, report stderr as an error.
- if !self.child.wait()?.success() {
- return Err(self.read_error()?);
- }
- }
- Ok(nread)
- }
-}
diff --git a/src/search.rs b/src/search.rs
index 45f7cf87..457f8f7a 100644
--- a/src/search.rs
+++ b/src/search.rs
@@ -1,7 +1,10 @@
+use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
+use std::process::{Command, Stdio};
use std::time::Duration;
+use grep::cli;
use grep::matcher::Matcher;
#[cfg(feature = "pcre2")]
use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher};
@@ -11,8 +14,6 @@ use grep::searcher::Searcher;
use serde_json as json;
use termcolor::WriteColor;
-use decompressor::{DecompressionReader, is_compressed};
-use preprocessor::PreprocessorReader;
use subject::Subject;
/// The configuration for the search worker. Among a few other things, the
@@ -39,6 +40,8 @@ impl Default for Config {
#[derive(Clone, Debug)]
pub struct SearchWorkerBuilder {
config: Config,
+ command_builder: cli::CommandReaderBuilder,
+ decomp_builder: cli::DecompressionReaderBuilder,
}
impl Default for SearchWorkerBuilder {
@@ -50,7 +53,17 @@ impl Default for SearchWorkerBuilder {
impl SearchWorkerBuilder {
/// Create a new builder for configuring and constructing a search worker.
pub fn new() -> SearchWorkerBuilder {
- SearchWorkerBuilder { config: Config::default() }
+ let mut cmd_builder = cli::CommandReaderBuilder::new();
+ cmd_builder.async_stderr(true);
+
+ let mut decomp_builder = cli::DecompressionReaderBuilder::new();
+ decomp_builder.async_stderr(true);
+
+ SearchWorkerBuilder {
+ config: Config::default(),
+ command_builder: cmd_builder,
+ decomp_builder: decomp_builder,
+ }
}
/// Create a new search worker using the given searcher, matcher and
@@ -62,7 +75,12 @@ impl SearchWorkerBuilder {
printer: Printer<W>,
) -> SearchWorker<W> {
let config = self.config.clone();
- SearchWorker { config, matcher, searcher, printer }
+ let command_builder = self.command_builder.clone();
+ let decomp_builder = self.decomp_builder.clone();
+ SearchWorker {
+ config, command_builder, decomp_builder,
+ matcher, searcher, printer,
+ }
}
/// Forcefully use JSON to emit statistics, even if the underlying printer
@@ -237,6 +255,8 @@ impl<W: WriteColor> Printer<W> {
#[derive(Debug)]
pub struct SearchWorker<W> {
config: Config,
+ command_builder: cli::CommandReaderBuilder,
+ decomp_builder: cli::DecompressionReaderBuilder,
matcher: PatternMatcher,
searcher: Searcher,
printer: Printer<W>,
@@ -279,19 +299,48 @@ impl<W: WriteColor> SearchWorker<W> {
// A `return` here appeases the borrow checker. NLL will fix this.
return self.search_reader(path, stdin.lock());
} else if self.config.preprocessor.is_some() {
- let cmd = self.config.preprocessor.clone().unwrap();
- let rdr = PreprocessorReader::from_cmd_path(cmd, path)?;
- self.search_reader(path, rdr)
- } else if self.config.search_zip && is_compressed(path) {
- match DecompressionReader::from_path(path) {
- None => Ok(SearchResult::default()),
- Some(rdr) => self.search_reader(path, rdr),
- }
+ self.search_preprocessor(path)
+ } else if self.should_decompress(path) {
+ self.search_decompress(path)
} else {
self.search_path(path)
}
}
+ /// Returns true if and only if the given file path should be
+ /// decompressed before searching.
+ fn should_decompress(&self, path: &Path) -> bool {
+ if !self.config.search_zip {
+ return false;
+ }
+ self.decomp_builder.get_matcher().has_command(path)
+ }
+
+ fn search_preprocessor(
+ &mut self,
+ path: &Path,
+ ) -> io::Result<SearchResult> {
+ let bin = self.config.preprocessor.clone().unwrap();
+ let mut cmd = Command::new(&bin);
+ cmd.arg(path).stdin(Stdio::from(File::open(path)?));
+
+ let rdr = self.command_builder.build(&mut cmd)?;
+ self.search_reader(path, rdr).map_err(|err| {
+ io::Error::new(
+ io::ErrorKind::Other,
+ format!("preprocessor command failed: '{:?}': {}", cmd, err),
+ )
+ })
+ }
+
+ fn search_decompress(
+ &mut self,
+ path: &Path,
+ ) -> io::Result<SearchResult> {
+ let rdr = self.decomp_builder.build(path)?;
+ self.search_reader(path, rdr)
+ }
+
/// Search the contents of the given file path.
fn search_path(&mut self, path: &Path) -> io::Result<SearchResult> {
use self::PatternMatcher::*;
diff --git a/src/unescape.rs b/src/unescape.rs
deleted file mode 100644
index 0c7f1c8d..00000000
--- a/src/unescape.rs
+++ /dev/null
@@ -1,137 +0,0 @@
-/// A single state in the state machine used by `unescape`.
-#[derive(Clone, Copy, Eq, PartialEq)]
-enum State {
- /// The state after seeing a `\`.
- Escape,
- /// The state after seeing a `\x`.
- HexFirst,
- /// The state after seeing a `\x[0-9A-Fa-f]`.
- HexSecond(char),
- /// Default state.
- Literal,
-}
-
-/// Escapes an arbitrary byte slice such that it can be presented as a human
-/// readable string.
-pub fn escape(bytes: &[u8]) -> String {
- use std::ascii::escape_default;
-
- let escaped = bytes.iter().flat_map(|&b| escape_default(b)).collect();
- String::from_utf8(escaped).unwrap()
-}
-
-/// Unescapes a string given on the command line. It supports a limited set of
-/// escape sequences:
-///
-/// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes.
-/// * `\xZZ` hexadecimal escapes are mapped to their byte.
-pub fn unescape(s: &str) -> Vec<u8> {
- use self::State::*;
-
- let mut bytes = vec![];
- let mut state = Literal;
- for c in s.chars() {
- match state {
- Escape => {
- match c {
- 'n' => { bytes.push(b'\n'); state = Literal; }
- 'r' => { bytes.push(b'\r'); state = Literal; }
- 't' => { bytes.push(b'\t'); state = Literal; }
- 'x' => { state = HexFirst; }
- c => {
- bytes.extend(format!(r"\{}", c).into_bytes());
- state = Literal;
- }
- }
- }
- HexFirst => {
- match c {
- '0'...'9' | 'A'...'F' | 'a'...'f' => {
- state = HexSecond(c);
- }
- c => {
- bytes.extend(format!(r"\x{}", c).into_bytes());
- state = Literal;
- }
- }
- }
- HexSecond(first) => {
- match c {
- '0'...'9' | 'A'...'F' | 'a'...'f' => {
- let ordinal = format!("{}{}", first, c);
- let byte = u8::from_str_radix(&ordinal, 16).unwrap();
- bytes.push(byte);
- state = Literal;
- }
- c => {
- let original = format!(r"\x{}{}", first, c);
- bytes.extend(original.into_bytes());
- state = Literal;
- }
- }
- }
- Literal => {
- match c {
- '\\' => { state = Escape; }
- c => { bytes.extend(c.to_string().as_bytes()); }
- }
- }
- }
- }
- match state {
- Escape => bytes.push(b'\\'),
- HexFirst => bytes.extend(b"\\x"),
- HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()),
- Literal => {}
- }
- bytes
-}
-
-#[cfg(test)]
-mod tests {
- use super::unescape;
-
- fn b(bytes: &'static [u8]) -> Vec<u8> {
- bytes.to_vec()
- }
-
- #[test]
- fn unescape_nul() {
- assert_eq!(b(b"\x00"), unescape(r"\x00"));
- }
-
- #[test]
- fn unescape_nl() {
- assert_eq!(b(b"\n"), unescape(r"\n"));
- }
-
- #[test]
- fn unescape_tab() {
- assert_eq!(b(b"\t"), unescape(r"\t"));
- }
-
- #[test]
- fn unescape_carriage() {
- assert_eq!(b(b"\r"), unescape(r"\r"));
- }
-
- #[test]
- fn unescape_nothing_simple() {
- assert_eq!(b(b"\\a"), unescape(r"\a"));
- }
-
- #[test]
- fn unescape_nothing_hex0() {
- assert_eq!(b(b"\\x"), unescape(r"\x"));
- }
-
- #[test]
- fn unescape_nothing_hex1() {
- assert_eq!(b(b"\\xz"), unescape(r"\xz"));
- }
-
- #[test]
- fn unescape_nothing_hex2() {
- assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
- }
-}