grep-cli: introduce new grep-cli crate

This commit moves a lot of "utility" code from ripgrep core into grep-cli. Any one of these things might not be worth creating a new crate, but combining everything together results in a fair number of a convenience routines that make up a decent sized crate. There is potentially more we could move into the crate, but much of what remains in ripgrep core is almost entirely dealing with the number of flags we support. In the course of doing moving things to the grep-cli crate, we clean up a lot of gunk and improve failure modes in a number of cases. In particular, we've fixed a bug where other processes could deadlock if they write too much to stderr. Fixes #990
author: Andrew Gallant <jamslam@gmail.com> 2018-08-29 20:53:52 -0400
committer: Andrew Gallant <jamslam@gmail.com> 2018-09-04 23:18:55 -0400
commit: 4846d63539690047fa58ec582d94bcba16da1c09 (patch)
tree: 61a2cf9de3d62ea6524659893ab9a2c7800c3286
parent: 13c47530a6e685d2dee1953a64f055936e6a2ba8 (diff)
26 files changed, 1987 insertions, 598 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c0fb04b9..73406762 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -71,6 +71,8 @@ Bug fixes:
   Context from the `--passthru` flag should not impact process exit status.
 * [BUG #984](https://github.com/BurntSushi/ripgrep/issues/984):
   Fixes bug in `ignore` crate where first path was always treated as a symlink.
+* [BUG #990](https://github.com/BurntSushi/ripgrep/issues/990):
+  Read stderr asynchronously when running a process.
 * [BUG #1013](https://github.com/BurntSushi/ripgrep/issues/1013):
   Add compile time and runtime CPU features to `--version` output.
 * [BUG #1028](https://github.com/BurntSushi/ripgrep/pull/1028):
diff --git a/Cargo.lock b/Cargo.lock
index 7ddb3f2b..b1ee1723 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -168,6 +168,7 @@ name = "grep"
 version = "0.2.0"
 dependencies = [
  "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "grep-cli 0.1.0",
  "grep-matcher 0.1.0",
  "grep-pcre2 0.1.0",
  "grep-printer 0.1.0",
@@ -178,6 +179,20 @@ dependencies = [
 ]
 
 [[package]]
+name = "grep-cli"
+version = "0.1.0"
+dependencies = [
+ "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "globset 0.4.1",
+ "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "same-file 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "termcolor 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "grep-matcher"
 version = "0.1.0"
 dependencies = [
@@ -464,21 +479,17 @@ dependencies = [
 name = "ripgrep"
 version = "0.9.0"
 dependencies = [
- "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
  "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "globset 0.4.1",
  "grep 0.2.0",
  "ignore 0.4.3",
  "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "log 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "same-file 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.75 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 1.0.75 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)",
  "termcolor 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 3ff769c6..0c489c46 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,6 +35,7 @@ path = "tests/tests.rs"
 members = [
   "globset",
   "grep",
+  "grep-cli",
   "grep-matcher",
   "grep-pcre2",
   "grep-printer",
@@ -44,15 +45,12 @@ members = [
 ]
 
 [dependencies]
-atty = "0.2.11"
-globset = { version = "0.4.0", path = "globset" }
 grep = { version = "0.2.0", path = "grep" }
 ignore = { version = "0.4.0", path = "ignore" }
 lazy_static = "1"
 log = "0.4"
 num_cpus = "1"
 regex = "1"
-same-file = "1"
 serde_json = "1"
 termcolor = "1"
 
@@ -61,9 +59,6 @@ version = "2.32.0"
 default-features = false
 features = ["suggestions"]
 
-[target.'cfg(windows)'.dependencies.winapi-util]
-version = "0.1.1"
-
 [build-dependencies]
 lazy_static = "1"
 
diff --git a/grep-cli/Cargo.toml b/grep-cli/Cargo.toml
new file mode 100644
index 00000000..1d5fda22
--- /dev/null
+++ b/grep-cli/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "grep-cli"
+version = "0.1.0"  #:version
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+description = """
+Utilities for search oriented command line applications.
+"""
+documentation = "https://docs.rs/grep-cli"
+homepage = "https://github.com/BurntSushi/ripgrep"
+repository = "https://github.com/BurntSushi/ripgrep"
+readme = "README.md"
+keywords = ["regex", "grep", "cli", "utility", "util"]
+license = "Unlicense/MIT"
+
+[dependencies]
+atty = "0.2.11"
+globset = { version = "0.4.1", path = "../globset" }
+lazy_static = "1.1"
+log = "0.4"
+regex = "1"
+same-file = "1"
+termcolor = "1"
+
+[target.'cfg(windows)'.dependencies.winapi-util]
+version = "0.1.1"
diff --git a/grep-cli/LICENSE-MIT b/grep-cli/LICENSE-MIT
new file mode 100644
index 00000000..3b0a5dc0
--- /dev/null
+++ b/grep-cli/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/grep-cli/README.md b/grep-cli/README.md
new file mode 100644
index 00000000..e78317c6
--- /dev/null
+++ b/grep-cli/README.md
@@ -0,0 +1,38 @@
+grep-cli
+--------
+A utility library that provides common routines desired in search oriented
+command line applications. This includes, but is not limited to, parsing hex
+escapes, detecting whether stdin is readable and more. To the extent possible,
+this crate strives for compatibility across Windows, macOS and Linux.
+
+[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
+[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
+[![](https://img.shields.io/crates/v/grep-cli.svg)](https://crates.io/crates/grep-cli)
+
+Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
+
+
+### Documentation
+
+[https://docs.rs/grep-cli](https://docs.rs/grep-cli)
+
+**NOTE:** You probably don't want to use this crate directly. Instead, you
+should prefer the facade defined in the
+[`grep`](https://docs.rs/grep)
+crate.
+
+
+### Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+grep-cli = "0.1"
+```
+
+and this to your crate root:
+
+```rust
+extern crate grep_cli;
+```
diff --git a/grep-cli/UNLICENSE b/grep-cli/UNLICENSE
new file mode 100644
index 00000000..68a49daa
--- /dev/null
+++ b/grep-cli/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/grep-cli/src/decompress.rs b/grep-cli/src/decompress.rs
new file mode 100644
index 00000000..ad108ea0
--- /dev/null
+++ b/grep-cli/src/decompress.rs
@@ -0,0 +1,381 @@
+use std::ffi::{OsStr, OsString};
+use std::fs::File;
+use std::io;
+use std::path::Path;
+use std::process::Command;
+
+use globset::{Glob, GlobSet, GlobSetBuilder};
+
+use process::{CommandError, CommandReader, CommandReaderBuilder};
+
+/// A builder for a matcher that determines which files get decompressed.
+#[derive(Clone, Debug)]
+pub struct DecompressionMatcherBuilder {
+    /// The commands for each matching glob.
+    commands: Vec<DecompressionCommand>,
+    /// Whether to include the default matching rules.
+    defaults: bool,
+}
+
+/// A representation of a single command for decompressing data
+/// out-of-proccess.
+#[derive(Clone, Debug)]
+struct DecompressionCommand {
+    /// The glob that matches this command.
+    glob: String,
+    /// The command or binary name.
+    bin: OsString,
+    /// The arguments to invoke with the command.
+    args: Vec<OsString>,
+}
+
+impl Default for DecompressionMatcherBuilder {
+    fn default() -> DecompressionMatcherBuilder {
+        DecompressionMatcherBuilder::new()
+    }
+}
+
+impl DecompressionMatcherBuilder {
+    /// Create a new builder for configuring a decompression matcher.
+    pub fn new() -> DecompressionMatcherBuilder {
+        DecompressionMatcherBuilder {
+            commands: vec![],
+            defaults: true,
+        }
+    }
+
+    /// Build a matcher for determining how to decompress files.
+    ///
+    /// If there was a problem compiling the matcher, then an error is
+    /// returned.
+    pub fn build(&self) -> Result<DecompressionMatcher, CommandError> {
+        let defaults =
+            if !self.defaults {
+                vec![]
+            } else {
+                default_decompression_commands()
+            };
+        let mut glob_builder = GlobSetBuilder::new();
+        let mut commands = vec![];
+        for decomp_cmd in defaults.iter().chain(&self.commands) {
+            let glob = Glob::new(&decomp_cmd.glob).map_err(|err| {
+                CommandError::io(io::Error::new(io::ErrorKind::Other, err))
+            })?;
+            glob_builder.add(glob);
+            commands.push(decomp_cmd.clone());
+        }
+        let globs = glob_builder.build().map_err(|err| {
+            CommandError::io(io::Error::new(io::ErrorKind::Other, err))
+        })?;
+        Ok(DecompressionMatcher { globs, commands })
+    }
+
+    /// When enabled, the default matching rules will be compiled into this
+    /// matcher before any other associations. When disabled, only the
+    /// rules explicitly given to this builder will be used.
+    ///
+    /// This is enabled by default.
+    pub fn defaults(&mut self, yes: bool) -> &mut DecompressionMatcherBuilder {
+        self.defaults = yes;
+        self
+    }
+
+    /// Associates a glob with a command to decompress files matching the glob.
+    ///
+    /// If multiple globs match the same file, then the most recently added
+    /// glob takes precedence.
+    ///
+    /// The syntax for the glob is documented in the
+    /// [`globset` crate](https://docs.rs/globset/#syntax).
+    pub fn associate<P, I, A>(
+        &mut self,
+        glob: &str,
+        program: P,
+        args: I,
+    ) -> &mut DecompressionMatcherBuilder
+    where P: AsRef<OsStr>,
+          I: IntoIterator<Item=A>,
+          A: AsRef<OsStr>,
+    {
+
+        let glob = glob.to_string();
+        let bin = program.as_ref().to_os_string();
+        let args = args
+            .into_iter()
+            .map(|a| a.as_ref().to_os_string())
+            .collect();
+        self.commands.push(DecompressionCommand { glob, bin, args });
+        self
+    }
+}
+
+/// A matcher for determining how to decompress files.
+#[derive(Clone, Debug)]
+pub struct DecompressionMatcher {
+    /// The set of globs to match. Each glob has a corresponding entry in
+    /// `commands`. When a glob matches, the corresponding command should be
+    /// used to perform out-of-process decompression.
+    globs: GlobSet,
+    /// The commands for each matching glob.
+    commands: Vec<DecompressionCommand>,
+}
+
+impl Default for DecompressionMatcher {
+    fn default() -> DecompressionMatcher {
+        DecompressionMatcher::new()
+    }
+}
+
+impl DecompressionMatcher {
+    /// Create a new matcher with default rules.
+    ///
+    /// To add more matching rules, build a matcher with
+    /// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html).
+    pub fn new() -> DecompressionMatcher {
+        DecompressionMatcherBuilder::new()
+            .build()
+            .expect("built-in matching rules should always compile")
+    }
+
+    /// Return a pre-built command based on the given file path that can
+    /// decompress its contents. If no such decompressor is known, then this
+    /// returns `None`.
+    ///
+    /// If there are multiple possible commands matching the given path, then
+    /// the command added last takes precedence.
+    pub fn command<P: AsRef<Path>>(&self, path: P) -> Option<Command> {
+        for i in self.globs.matches(path).into_iter().rev() {
+            let decomp_cmd = &self.commands[i];
+            let mut cmd = Command::new(&decomp_cmd.bin);
+            cmd.args(&decomp_cmd.args);
+            return Some(cmd);
+        }
+        None
+    }
+
+    /// Returns true if and only if the given file path has at least one
+    /// matching command to perform decompression on.
+    pub fn has_command<P: AsRef<Path>>(&self, path: P) -> bool {
+        self.globs.is_match(path)
+    }
+}
+
+/// Configures and builds a streaming reader for decompressing data.
+#[derive(Clone, Debug, Default)]
+pub struct DecompressionReaderBuilder {
+    matcher: DecompressionMatcher,
+    command_builder: CommandReaderBuilder,
+}
+
+impl DecompressionReaderBuilder {
+    /// Create a new builder with the default configuration.
+    pub fn new() -> DecompressionReaderBuilder {
+        DecompressionReaderBuilder::default()
+    }
+
+    /// Build a new streaming reader for decompressing data.
+    ///
+    /// If decompression is done out-of-process and if there was a problem
+    /// spawning the process, then its error is logged at the debug level and a
+    /// passthru reader is returned that does no decompression. This behavior
+    /// typically occurs when the given file path matches a decompression
+    /// command, but is executing in an environment where the decompression
+    /// command is not available.
+    ///
+    /// If the given file path could not be matched with a decompression
+    /// strategy, then a passthru reader is returned that does no
+    /// decompression.
+    pub fn build<P: AsRef<Path>>(
+        &self,
+        path: P,
+    ) -> Result<DecompressionReader, CommandError> {
+        let path = path.as_ref();
+        let mut cmd = match self.matcher.command(path) {
+            None => return DecompressionReader::new_passthru(path),
+            Some(cmd) => cmd,
+        };
+        cmd.arg(path);
+
+        match self.command_builder.build(&mut cmd) {
+            Ok(cmd_reader) => Ok(DecompressionReader { rdr: Ok(cmd_reader) }),
+            Err(err) => {
+                debug!(
+                    "{}: error spawning command '{:?}': {} \
+                     (falling back to uncompressed reader)",
+                    path.display(),
+                    cmd,
+                    err,
+                );
+                DecompressionReader::new_passthru(path)
+            }
+        }
+    }
+
+    /// Set the matcher to use to look up the decompression command for each
+    /// file path.
+    ///
+    /// A set of sensible rules is enabled by default. Setting this will
+    /// completely replace the current rules.
+    pub fn matcher(
+        &mut self,
+        matcher: DecompressionMatcher,
+    ) -> &mut DecompressionReaderBuilder {
+        self.matcher = matcher;
+        self
+    }
+
+    /// Get the underlying matcher currently used by this builder.
+    pub fn get_matcher(&self) -> &DecompressionMatcher {
+        &self.matcher
+    }
+
+    /// When enabled, the reader will asynchronously read the contents of the
+    /// command's stderr output. When disabled, stderr is only read after the
+    /// stdout stream has been exhausted (or if the process quits with an error
+    /// code).
+    ///
+    /// Note that when enabled, this may require launching an additional
+    /// thread in order to read stderr. This is done so that the process being
+    /// executed is never blocked from writing to stdout or stderr. If this is
+    /// disabled, then it is possible for the process to fill up the stderr
+    /// buffer and deadlock.
+    ///
+    /// This is enabled by default.
+    pub fn async_stderr(
+        &mut self,
+        yes: bool,
+    ) -> &mut DecompressionReaderBuilder {
+        self.command_builder.async_stderr(yes);
+        self
+    }
+}
+
+/// A streaming reader for decompressing the contents of a file.
+///
+/// The purpose of this reader is to provide a seamless way to decompress the
+/// contents of file using existing tools in the current environment. This is
+/// meant to be an alternative to using decompression libraries in favor of the
+/// simplicity and portability of using external commands such as `gzip` and
+/// `xz`. This does impose the overhead of spawning a process, so other means
+/// for performing decompression should be sought if this overhead isn't
+/// acceptable.
+///
+/// A decompression reader comes with a default set of matching rules that are
+/// meant to associate file paths with the corresponding command to use to
+/// decompress them. For example, a glob like `*.gz` matches gzip compressed
+/// files with the command `gzip -d -c`. If a file path does not match any
+/// existing rules, or if it matches a rule whose command does not exist in the
+/// current environment, then the decompression reader passes through the
+/// contents of the underlying file without doing any decompression.
+///
+/// The default matching rules are probably good enough for most cases, and if
+/// they require revision, pull requests are welcome. In cases where they must
+/// be changed or extended, they can be customized through the use of
+/// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html)
+/// and
+/// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html).
+///
+/// By default, this reader will asynchronously read the processes' stderr.
+/// This prevents subtle deadlocking bugs for noisy processes that write a lot
+/// to stderr. Currently, the entire contents of stderr is read on to the heap.
+///
+/// # Example
+///
+/// This example shows how to read the decompressed contents of a file without
+/// needing to explicitly choose the decompression command to run.
+///
+/// Note that if you need to decompress multiple files, it is better to use
+/// `DecompressionReaderBuilder`, which will amortize the cost of compiling the
+/// matcher.
+///
+/// ```no_run
+/// use std::io::Read;
+/// use std::process::Command;
+/// use grep_cli::DecompressionReader;
+///
+/// # fn example() -> Result<(), Box<::std::error::Error>> {
+/// let mut rdr = DecompressionReader::new("/usr/share/man/man1/ls.1.gz")?;
+/// let mut contents = vec![];
+/// rdr.read_to_end(&mut contents)?;
+/// # Ok(()) }
+/// ```
+#[derive(Debug)]
+pub struct DecompressionReader {
+    rdr: Result<CommandReader, File>,
+}
+
+impl DecompressionReader {
+    /// Build a new streaming reader for decompressing data.
+    ///
+    /// If decompression is done out-of-process and if there was a problem
+    /// spawning the process, then its error is returned.
+    ///
+    /// If the given file path could not be matched with a decompression
+    /// strategy, then a passthru reader is returned that does no
+    /// decompression.
+    ///
+    /// This uses the default matching rules for determining how to decompress
+    /// the given file. To change those matching rules, use
+    /// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html)
+    /// and
+    /// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html).
+    ///
+    /// When creating readers for many paths. it is better to use the builder
+    /// since it will amortize the cost of constructing the matcher.
+    pub fn new<P: AsRef<Path>>(
+        path: P,
+    ) -> Result<DecompressionReader, CommandError> {
+        DecompressionReaderBuilder::new().build(path)
+    }
+
+    /// Creates a new "passthru" decompression reader that reads from the file
+    /// corresponding to the given path without doing decompression and without
+    /// executing another process.
+    fn new_passthru(path: &Path) -> Result<DecompressionReader, CommandError> {
+        let file = File::open(path)?;
+        Ok(DecompressionReader { rdr: Err(file) })
+    }
+}
+
+impl io::Read for DecompressionReader {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        match self.rdr {
+            Ok(ref mut rdr) => rdr.read(buf),
+            Err(ref mut rdr) => rdr.read(buf),
+        }
+    }
+}
+
+fn default_decompression_commands() -> Vec<DecompressionCommand> {
+    const ARGS_GZIP: &[&str] = &["gzip", "-d", "-c"];
+    const ARGS_BZIP: &[&str] = &["bzip2", "-d", "-c"];
+    const ARGS_XZ: &[&str] = &["xz", "-d", "-c"];
+    const ARGS_LZ4: &[&str] = &["lz4", "-d", "-c"];
+    const ARGS_LZMA: &[&str] = &["xz", "--format=lzma", "-d", "-c"];
+
+    fn cmd(glob: &str, args: &[&str]) -> DecompressionCommand {
+        DecompressionCommand {
+            glob: glob.to_string(),
+            bin: OsStr::new(&args[0]).to_os_string(),
+            args: args
+                .iter()
+                .skip(1)
+                .map(|s| OsStr::new(s).to_os_string())
+                .collect(),
+        }
+    }
+    vec![
+        cmd("*.gz", ARGS_GZIP),
+        cmd("*.tgz", ARGS_GZIP),
+
+        cmd("*.bz2", ARGS_BZIP),
+        cmd("*.tbz2", ARGS_BZIP),
+
+        cmd("*.xz", ARGS_XZ),
+        cmd("*.txz", ARGS_XZ),
+
+        cmd("*.lz4", ARGS_LZ4),
+
+        cmd("*.lzma", ARGS_LZMA),
+    ]
+}
diff --git a/grep-cli/src/escape.rs b/grep-cli/src/escape.rs
new file mode 100644
index 00000000..9b350a93
--- /dev/null
+++ b/grep-cli/src/escape.rs
@@ -0,0 +1,315 @@
+use std::ffi::OsStr;
+use std::str;
+
+/// A single state in the state machine used by `unescape`.
+#[derive(Clone, Copy, Eq, PartialEq)]
+enum State {
+    /// The state after seeing a `\`.
+    Escape,
+    /// The state after seeing a `\x`.
+    HexFirst,
+    /// The state after seeing a `\x[0-9A-Fa-f]`.
+    HexSecond(char),
+    /// Default state.
+    Literal,
+}
+
+/// Escapes arbitrary bytes into a human readable string.
+///
+/// This converts `\t`, `\r` and `\n` into their escaped forms. It also
+/// converts the non-printable subset of ASCII in addition to invalid UTF-8
+/// bytes to hexadecimal escape sequences. Everything else is left as is.
+///
+/// The dual of this routine is [`unescape`](fn.unescape.html).
+///
+/// # Example
+///
+/// This example shows how to convert a byte string that contains a `\n` and
+/// invalid UTF-8 bytes into a `String`.
+///
+/// Pay special attention to the use of raw strings. That is, `r"\n"` is
+/// equivalent to `"\\n"`.
+///
+/// ```
+/// use grep_cli::escape;
+///
+/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
+/// ```
+pub fn escape(mut bytes: &[u8]) -> String {
+    let mut escaped = String::new();
+    while let Some(result) = decode_utf8(bytes) {
+        match result {
+            Ok(cp) => {
+                escape_char(cp, &mut escaped);
+                bytes = &bytes[cp.len_utf8()..];
+            }
+            Err(byte) => {
+                escape_byte(byte, &mut escaped);
+                bytes = &bytes[1..];
+            }
+        }
+    }
+    escaped
+}
+
+/// Escapes an OS string into a human readable string.
+///
+/// This is like [`escape`](fn.escape.html), but accepts an OS string.
+pub fn escape_os(string: &OsStr) -> String {
+    #[cfg(unix)]
+    fn imp(string: &OsStr) -> String {
+        use std::os::unix::ffi::OsStrExt;
+
+        escape(string.as_bytes())
+    }
+
+    #[cfg(not(unix))]
+    fn imp(string: &OsStr) -> String {
+        escape(string.to_string_lossy().as_bytes())
+    }
+
+    imp(string)
+}
+
+/// Unescapes a string.
+///
+/// It supports a limited set of escape sequences:
+///
+/// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes.
+/// * `\xZZ` hexadecimal escapes are mapped to their byte.
+///
+/// Everything else is left as is, including non-hexadecimal escapes like
+/// `\xGG`.
+///
+/// This is useful when it is desirable for a command line argument to be
+/// capable of specifying arbitrary bytes or otherwise make it easier to
+/// specify non-printable characters.
+///
+/// The dual of this routine is [`escape`](fn.escape.html).
+///
+/// # Example
+///
+/// This example shows how to convert an escaped string (which is valid UTF-8)
+/// into a corresponding sequence of bytes. Each escape sequence is mapped to
+/// its bytes, which may include invalid UTF-8.
+///
+/// Pay special attention to the use of raw strings. That is, `r"\n"` is
+/// equivalent to `"\\n"`.
+///
+/// ```
+/// use grep_cli::unescape;
+///
+/// assert_eq!(&b"foo\nbar\xFFbaz"[..], &*unescape(r"foo\nbar\xFFbaz"));
+/// ```
+pub fn unescape(s: &str) -> Vec<u8> {
+    use self::State::*;
+
+    let mut bytes = vec![];
+    let mut state = Literal;
+    for c in s.chars() {
+        match state {
+            Escape => {
+                match c {
+                    '\\' => { bytes.push(b'\\'); state = Literal; }
+                    'n' => { bytes.push(b'\n'); state = Literal; }
+                    'r' => { bytes.push(b'\r'); state = Literal; }
+                    't' => { bytes.push(b'\t'); state = Literal; }
+                    'x' => { state = HexFirst; }
+                    c => {
+                        bytes.extend(format!(r"\{}", c).into_bytes());
+                        state = Literal;
+                    }
+                }
+            }
+            HexFirst => {
+                match c {
+                    '0'...'9' | 'A'...'F' | 'a'...'f' => {
+                        state = HexSecond(c);
+                    }
+                    c => {
+                        bytes.extend(format!(r"\x{}", c).into_bytes());
+                        state = Literal;
+                    }
+                }
+            }
+            HexSecond(first) => {
+                match c {
+                    '0'...'9' | 'A'...'F' | 'a'...'f' => {
+                        let ordinal = format!("{}{}", first, c);
+                        let byte = u8::from_str_radix(&ordinal, 16).unwrap();
+                        bytes.push(byte);
+                        state = Literal;
+                    }
+                    c => {
+                        let original = format!(r"\x{}{}", first, c);
+                        bytes.extend(original.into_bytes());
+                        state = Literal;
+                    }
+                }
+            }
+            Literal => {
+                match c {
+                    '\\' => { state = Escape; }
+                    c => { bytes.extend(c.to_string().as_bytes()); }
+                }
+            }
+        }
+    }
+    match state {
+        Escape => bytes.push(b'\\'),
+        HexFirst => bytes.extend(b"\\x"),
+        HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()),
+        Literal => {}
+    }
+    bytes
+}
+
+/// Unescapes an OS string.
+///
+/// This is like [`unescape`](fn.unescape.html), but
author	Andrew Gallant <jamslam@gmail.com>	2018-08-29 20:53:52 -0400
committer	Andrew Gallant <jamslam@gmail.com>	2018-09-04 23:18:55 -0400
commit	4846d63539690047fa58ec582d94bcba16da1c09 (patch)
tree	61a2cf9de3d62ea6524659893ab9a2c7800c3286
parent	13c47530a6e685d2dee1953a64f055936e6a2ba8 (diff)