summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md2
-rw-r--r--Cargo.lock19
-rw-r--r--Cargo.toml7
-rw-r--r--grep-cli/Cargo.toml25
-rw-r--r--grep-cli/LICENSE-MIT21
-rw-r--r--grep-cli/README.md38
-rw-r--r--grep-cli/UNLICENSE24
-rw-r--r--grep-cli/src/decompress.rs381
-rw-r--r--grep-cli/src/escape.rs315
-rw-r--r--grep-cli/src/human.rs171
-rw-r--r--grep-cli/src/lib.rs251
-rw-r--r--grep-cli/src/pattern.rs205
-rw-r--r--grep-cli/src/process.rs267
-rw-r--r--grep-cli/src/wtr.rs133
-rw-r--r--grep-printer/src/color.rs28
-rw-r--r--grep-printer/src/lib.rs2
-rw-r--r--grep/Cargo.toml1
-rw-r--r--grep/examples/simplegrep.rs34
-rw-r--r--grep/src/lib.rs1
-rw-r--r--src/app.rs8
-rw-r--r--src/args.rs149
-rw-r--r--src/decompressor.rs190
-rw-r--r--src/main.rs10
-rw-r--r--src/preprocessor.rs93
-rw-r--r--src/search.rs73
-rw-r--r--src/unescape.rs137
26 files changed, 1987 insertions, 598 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c0fb04b9..73406762 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -71,6 +71,8 @@ Bug fixes:
Context from the `--passthru` flag should not impact process exit status.
* [BUG #984](https://github.com/BurntSushi/ripgrep/issues/984):
Fixes bug in `ignore` crate where first path was always treated as a symlink.
+* [BUG #990](https://github.com/BurntSushi/ripgrep/issues/990):
+ Read stderr asynchronously when running a process.
* [BUG #1013](https://github.com/BurntSushi/ripgrep/issues/1013):
Add compile time and runtime CPU features to `--version` output.
* [BUG #1028](https://github.com/BurntSushi/ripgrep/pull/1028):
diff --git a/Cargo.lock b/Cargo.lock
index 7ddb3f2b..b1ee1723 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -168,6 +168,7 @@ name = "grep"
version = "0.2.0"
dependencies = [
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "grep-cli 0.1.0",
"grep-matcher 0.1.0",
"grep-pcre2 0.1.0",
"grep-printer 0.1.0",
@@ -178,6 +179,20 @@ dependencies = [
]
[[package]]
+name = "grep-cli"
+version = "0.1.0"
+dependencies = [
+ "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "globset 0.4.1",
+ "lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "log 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "same-file 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "termcolor 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
name = "grep-matcher"
version = "0.1.0"
dependencies = [
@@ -464,21 +479,17 @@ dependencies = [
name = "ripgrep"
version = "0.9.0"
dependencies = [
- "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "globset 0.4.1",
"grep 0.2.0",
"ignore 0.4.3",
"lazy_static 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "same-file 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.75 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.75 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)",
"termcolor 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "winapi-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 3ff769c6..0c489c46 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,6 +35,7 @@ path = "tests/tests.rs"
members = [
"globset",
"grep",
+ "grep-cli",
"grep-matcher",
"grep-pcre2",
"grep-printer",
@@ -44,15 +45,12 @@ members = [
]
[dependencies]
-atty = "0.2.11"
-globset = { version = "0.4.0", path = "globset" }
grep = { version = "0.2.0", path = "grep" }
ignore = { version = "0.4.0", path = "ignore" }
lazy_static = "1"
log = "0.4"
num_cpus = "1"
regex = "1"
-same-file = "1"
serde_json = "1"
termcolor = "1"
@@ -61,9 +59,6 @@ version = "2.32.0"
default-features = false
features = ["suggestions"]
-[target.'cfg(windows)'.dependencies.winapi-util]
-version = "0.1.1"
-
[build-dependencies]
lazy_static = "1"
diff --git a/grep-cli/Cargo.toml b/grep-cli/Cargo.toml
new file mode 100644
index 00000000..1d5fda22
--- /dev/null
+++ b/grep-cli/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "grep-cli"
+version = "0.1.0" #:version
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+description = """
+Utilities for search oriented command line applications.
+"""
+documentation = "https://docs.rs/grep-cli"
+homepage = "https://github.com/BurntSushi/ripgrep"
+repository = "https://github.com/BurntSushi/ripgrep"
+readme = "README.md"
+keywords = ["regex", "grep", "cli", "utility", "util"]
+license = "Unlicense/MIT"
+
+[dependencies]
+atty = "0.2.11"
+globset = { version = "0.4.1", path = "../globset" }
+lazy_static = "1.1"
+log = "0.4"
+regex = "1"
+same-file = "1"
+termcolor = "1"
+
+[target.'cfg(windows)'.dependencies.winapi-util]
+version = "0.1.1"
diff --git a/grep-cli/LICENSE-MIT b/grep-cli/LICENSE-MIT
new file mode 100644
index 00000000..3b0a5dc0
--- /dev/null
+++ b/grep-cli/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/grep-cli/README.md b/grep-cli/README.md
new file mode 100644
index 00000000..e78317c6
--- /dev/null
+++ b/grep-cli/README.md
@@ -0,0 +1,38 @@
+grep-cli
+--------
+A utility library that provides common routines desired in search oriented
+command line applications. This includes, but is not limited to, parsing hex
+escapes, detecting whether stdin is readable and more. To the extent possible,
+this crate strives for compatibility across Windows, macOS and Linux.
+
+[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
+[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
+[![](https://img.shields.io/crates/v/grep-cli.svg)](https://crates.io/crates/grep-cli)
+
+Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
+
+
+### Documentation
+
+[https://docs.rs/grep-cli](https://docs.rs/grep-cli)
+
+**NOTE:** You probably don't want to use this crate directly. Instead, you
+should prefer the facade defined in the
+[`grep`](https://docs.rs/grep)
+crate.
+
+
+### Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+grep-cli = "0.1"
+```
+
+and this to your crate root:
+
+```rust
+extern crate grep_cli;
+```
diff --git a/grep-cli/UNLICENSE b/grep-cli/UNLICENSE
new file mode 100644
index 00000000..68a49daa
--- /dev/null
+++ b/grep-cli/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
diff --git a/grep-cli/src/decompress.rs b/grep-cli/src/decompress.rs
new file mode 100644
index 00000000..ad108ea0
--- /dev/null
+++ b/grep-cli/src/decompress.rs
@@ -0,0 +1,381 @@
+use std::ffi::{OsStr, OsString};
+use std::fs::File;
+use std::io;
+use std::path::Path;
+use std::process::Command;
+
+use globset::{Glob, GlobSet, GlobSetBuilder};
+
+use process::{CommandError, CommandReader, CommandReaderBuilder};
+
+/// A builder for a matcher that determines which files get decompressed.
+#[derive(Clone, Debug)]
+pub struct DecompressionMatcherBuilder {
+ /// The commands for each matching glob.
+ commands: Vec<DecompressionCommand>,
+ /// Whether to include the default matching rules.
+ defaults: bool,
+}
+
+/// A representation of a single command for decompressing data
+/// out-of-proccess.
+#[derive(Clone, Debug)]
+struct DecompressionCommand {
+ /// The glob that matches this command.
+ glob: String,
+ /// The command or binary name.
+ bin: OsString,
+ /// The arguments to invoke with the command.
+ args: Vec<OsString>,
+}
+
+impl Default for DecompressionMatcherBuilder {
+ fn default() -> DecompressionMatcherBuilder {
+ DecompressionMatcherBuilder::new()
+ }
+}
+
+impl DecompressionMatcherBuilder {
+ /// Create a new builder for configuring a decompression matcher.
+ pub fn new() -> DecompressionMatcherBuilder {
+ DecompressionMatcherBuilder {
+ commands: vec![],
+ defaults: true,
+ }
+ }
+
+ /// Build a matcher for determining how to decompress files.
+ ///
+ /// If there was a problem compiling the matcher, then an error is
+ /// returned.
+ pub fn build(&self) -> Result<DecompressionMatcher, CommandError> {
+ let defaults =
+ if !self.defaults {
+ vec![]
+ } else {
+ default_decompression_commands()
+ };
+ let mut glob_builder = GlobSetBuilder::new();
+ let mut commands = vec![];
+ for decomp_cmd in defaults.iter().chain(&self.commands) {
+ let glob = Glob::new(&decomp_cmd.glob).map_err(|err| {
+ CommandError::io(io::Error::new(io::ErrorKind::Other, err))
+ })?;
+ glob_builder.add(glob);
+ commands.push(decomp_cmd.clone());
+ }
+ let globs = glob_builder.build().map_err(|err| {
+ CommandError::io(io::Error::new(io::ErrorKind::Other, err))
+ })?;
+ Ok(DecompressionMatcher { globs, commands })
+ }
+
+ /// When enabled, the default matching rules will be compiled into this
+ /// matcher before any other associations. When disabled, only the
+ /// rules explicitly given to this builder will be used.
+ ///
+ /// This is enabled by default.
+ pub fn defaults(&mut self, yes: bool) -> &mut DecompressionMatcherBuilder {
+ self.defaults = yes;
+ self
+ }
+
+ /// Associates a glob with a command to decompress files matching the glob.
+ ///
+ /// If multiple globs match the same file, then the most recently added
+ /// glob takes precedence.
+ ///
+ /// The syntax for the glob is documented in the
+ /// [`globset` crate](https://docs.rs/globset/#syntax).
+ pub fn associate<P, I, A>(
+ &mut self,
+ glob: &str,
+ program: P,
+ args: I,
+ ) -> &mut DecompressionMatcherBuilder
+ where P: AsRef<OsStr>,
+ I: IntoIterator<Item=A>,
+ A: AsRef<OsStr>,
+ {
+
+ let glob = glob.to_string();
+ let bin = program.as_ref().to_os_string();
+ let args = args
+ .into_iter()
+ .map(|a| a.as_ref().to_os_string())
+ .collect();
+ self.commands.push(DecompressionCommand { glob, bin, args });
+ self
+ }
+}
+
+/// A matcher for determining how to decompress files.
+#[derive(Clone, Debug)]
+pub struct DecompressionMatcher {
+ /// The set of globs to match. Each glob has a corresponding entry in
+ /// `commands`. When a glob matches, the corresponding command should be
+ /// used to perform out-of-process decompression.
+ globs: GlobSet,
+ /// The commands for each matching glob.
+ commands: Vec<DecompressionCommand>,
+}
+
+impl Default for DecompressionMatcher {
+ fn default() -> DecompressionMatcher {
+ DecompressionMatcher::new()
+ }
+}
+
+impl DecompressionMatcher {
+ /// Create a new matcher with default rules.
+ ///
+ /// To add more matching rules, build a matcher with
+ /// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html).
+ pub fn new() -> DecompressionMatcher {
+ DecompressionMatcherBuilder::new()
+ .build()
+ .expect("built-in matching rules should always compile")
+ }
+
+ /// Return a pre-built command based on the given file path that can
+ /// decompress its contents. If no such decompressor is known, then this
+ /// returns `None`.
+ ///
+ /// If there are multiple possible commands matching the given path, then
+ /// the command added last takes precedence.
+ pub fn command<P: AsRef<Path>>(&self, path: P) -> Option<Command> {
+ for i in self.globs.matches(path).into_iter().rev() {
+ let decomp_cmd = &self.commands[i];
+ let mut cmd = Command::new(&decomp_cmd.bin);
+ cmd.args(&decomp_cmd.args);
+ return Some(cmd);
+ }
+ None
+ }
+
+ /// Returns true if and only if the given file path has at least one
+ /// matching command to perform decompression on.
+ pub fn has_command<P: AsRef<Path>>(&self, path: P) -> bool {
+ self.globs.is_match(path)
+ }
+}
+
+/// Configures and builds a streaming reader for decompressing data.
+#[derive(Clone, Debug, Default)]
+pub struct DecompressionReaderBuilder {
+ matcher: DecompressionMatcher,
+ command_builder: CommandReaderBuilder,
+}
+
+impl DecompressionReaderBuilder {
+ /// Create a new builder with the default configuration.
+ pub fn new() -> DecompressionReaderBuilder {
+ DecompressionReaderBuilder::default()
+ }
+
+ /// Build a new streaming reader for decompressing data.
+ ///
+ /// If decompression is done out-of-process and if there was a problem
+ /// spawning the process, then its error is logged at the debug level and a
+ /// passthru reader is returned that does no decompression. This behavior
+ /// typically occurs when the given file path matches a decompression
+ /// command, but is executing in an environment where the decompression
+ /// command is not available.
+ ///
+ /// If the given file path could not be matched with a decompression
+ /// strategy, then a passthru reader is returned that does no
+ /// decompression.
+ pub fn build<P: AsRef<Path>>(
+ &self,
+ path: P,
+ ) -> Result<DecompressionReader, CommandError> {
+ let path = path.as_ref();
+ let mut cmd = match self.matcher.command(path) {
+ None => return DecompressionReader::new_passthru(path),
+ Some(cmd) => cmd,
+ };
+ cmd.arg(path);
+
+ match self.command_builder.build(&mut cmd) {
+ Ok(cmd_reader) => Ok(DecompressionReader { rdr: Ok(cmd_reader) }),
+ Err(err) => {
+ debug!(
+ "{}: error spawning command '{:?}': {} \
+ (falling back to uncompressed reader)",
+ path.display(),
+ cmd,
+ err,
+ );
+ DecompressionReader::new_passthru(path)
+ }
+ }
+ }
+
+ /// Set the matcher to use to look up the decompression command for each
+ /// file path.
+ ///
+ /// A set of sensible rules is enabled by default. Setting this will
+ /// completely replace the current rules.
+ pub fn matcher(
+ &mut self,
+ matcher: DecompressionMatcher,
+ ) -> &mut DecompressionReaderBuilder {
+ self.matcher = matcher;
+ self
+ }
+
+ /// Get the underlying matcher currently used by this builder.
+ pub fn get_matcher(&self) -> &DecompressionMatcher {
+ &self.matcher
+ }
+
+ /// When enabled, the reader will asynchronously read the contents of the
+ /// command's stderr output. When disabled, stderr is only read after the
+ /// stdout stream has been exhausted (or if the process quits with an error
+ /// code).
+ ///
+ /// Note that when enabled, this may require launching an additional
+ /// thread in order to read stderr. This is done so that the process being
+ /// executed is never blocked from writing to stdout or stderr. If this is
+ /// disabled, then it is possible for the process to fill up the stderr
+ /// buffer and deadlock.
+ ///
+ /// This is enabled by default.
+ pub fn async_stderr(
+ &mut self,
+ yes: bool,
+ ) -> &mut DecompressionReaderBuilder {
+ self.command_builder.async_stderr(yes);
+ self
+ }
+}
+
+/// A streaming reader for decompressing the contents of a file.
+///
+/// The purpose of this reader is to provide a seamless way to decompress the
+/// contents of file using existing tools in the current environment. This is
+/// meant to be an alternative to using decompression libraries in favor of the
+/// simplicity and portability of using external commands such as `gzip` and
+/// `xz`. This does impose the overhead of spawning a process, so other means
+/// for performing decompression should be sought if this overhead isn't
+/// acceptable.
+///
+/// A decompression reader comes with a default set of matching rules that are
+/// meant to associate file paths with the corresponding command to use to
+/// decompress them. For example, a glob like `*.gz` matches gzip compressed
+/// files with the command `gzip -d -c`. If a file path does not match any
+/// existing rules, or if it matches a rule whose command does not exist in the
+/// current environment, then the decompression reader passes through the
+/// contents of the underlying file without doing any decompression.
+///
+/// The default matching rules are probably good enough for most cases, and if
+/// they require revision, pull requests are welcome. In cases where they must
+/// be changed or extended, they can be customized through the use of
+/// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html)
+/// and
+/// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html).
+///
+/// By default, this reader will asynchronously read the processes' stderr.
+/// This prevents subtle deadlocking bugs for noisy processes that write a lot
+/// to stderr. Currently, the entire contents of stderr is read on to the heap.
+///
+/// # Example
+///
+/// This example shows how to read the decompressed contents of a file without
+/// needing to explicitly choose the decompression command to run.
+///
+/// Note that if you need to decompress multiple files, it is better to use
+/// `DecompressionReaderBuilder`, which will amortize the cost of compiling the
+/// matcher.
+///
+/// ```no_run
+/// use std::io::Read;
+/// use std::process::Command;
+/// use grep_cli::DecompressionReader;
+///
+/// # fn example() -> Result<(), Box<::std::error::Error>> {
+/// let mut rdr = DecompressionReader::new("/usr/share/man/man1/ls.1.gz")?;
+/// let mut contents = vec![];
+/// rdr.read_to_end(&mut contents)?;
+/// # Ok(()) }
+/// ```
+#[derive(Debug)]
+pub struct DecompressionReader {
+ rdr: Result<CommandReader, File>,
+}
+
+impl DecompressionReader {
+ /// Build a new streaming reader for decompressing data.
+ ///
+ /// If decompression is done out-of-process and if there was a problem
+ /// spawning the process, then its error is returned.
+ ///
+ /// If the given file path could not be matched with a decompression
+ /// strategy, then a passthru reader is returned that does no
+ /// decompression.
+ ///
+ /// This uses the default matching rules for determining how to decompress
+ /// the given file. To change those matching rules, use
+ /// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html)
+ /// and
+ /// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html).
+ ///
+ /// When creating readers for many paths. it is better to use the builder
+ /// since it will amortize the cost of constructing the matcher.
+ pub fn new<P: AsRef<Path>>(
+ path: P,
+ ) -> Result<DecompressionReader, CommandError> {
+ DecompressionReaderBuilder::new().build(path)
+ }
+
+ /// Creates a new "passthru" decompression reader that reads from the file
+ /// corresponding to the given path without doing decompression and without
+ /// executing another process.
+ fn new_passthru(path: &Path) -> Result<DecompressionReader, CommandError> {
+ let file = File::open(path)?;
+ Ok(DecompressionReader { rdr: Err(file) })
+ }
+}
+
+impl io::Read for DecompressionReader {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ match self.rdr {
+ Ok(ref mut rdr) => rdr.read(buf),
+ Err(ref mut rdr) => rdr.read(buf),
+ }
+ }
+}
+
+fn default_decompression_commands() -> Vec<DecompressionCommand> {
+ const ARGS_GZIP: &[&str] = &["gzip", "-d", "-c"];
+ const ARGS_BZIP: &[&str] = &["bzip2", "-d", "-c"];
+ const ARGS_XZ: &[&str] = &["xz", "-d", "-c"];
+ const ARGS_LZ4: &[&str] = &["lz4", "-d", "-c"];
+ const ARGS_LZMA: &[&str] = &["xz", "--format=lzma", "-d", "-c"];
+
+ fn cmd(glob: &str, args: &[&str]) -> DecompressionCommand {
+ DecompressionCommand {
+ glob: glob.to_string(),
+ bin: OsStr::new(&args[0]).to_os_string(),
+ args: args
+ .iter()
+ .skip(1)
+ .map(|s| OsStr::new(s).to_os_string())
+ .collect(),
+ }
+ }
+ vec![
+ cmd("*.gz", ARGS_GZIP),
+ cmd("*.tgz", ARGS_GZIP),
+
+ cmd("*.bz2", ARGS_BZIP),
+ cmd("*.tbz2", ARGS_BZIP),
+
+ cmd("*.xz", ARGS_XZ),
+ cmd("*.txz", ARGS_XZ),
+
+ cmd("*.lz4", ARGS_LZ4),
+
+ cmd("*.lzma", ARGS_LZMA),
+ ]
+}
diff --git a/grep-cli/src/escape.rs b/grep-cli/src/escape.rs
new file mode 100644
index 00000000..9b350a93
--- /dev/null
+++ b/grep-cli/src/escape.rs
@@ -0,0 +1,315 @@
+use std::ffi::OsStr;
+use std::str;
+
+/// A single state in the state machine used by `unescape`.
+#[derive(Clone, Copy, Eq, PartialEq)]
+enum State {
+ /// The state after seeing a `\`.
+ Escape,
+ /// The state after seeing a `\x`.
+ HexFirst,
+ /// The state after seeing a `\x[0-9A-Fa-f]`.
+ HexSecond(char),
+ /// Default state.
+ Literal,
+}
+
+/// Escapes arbitrary bytes into a human readable string.
+///
+/// This converts `\t`, `\r` and `\n` into their escaped forms. It also
+/// converts the non-printable subset of ASCII in addition to invalid UTF-8
+/// bytes to hexadecimal escape sequences. Everything else is left as is.
+///
+/// The dual of this routine is [`unescape`](fn.unescape.html).
+///
+/// # Example
+///
+/// This example shows how to convert a byte string that contains a `\n` and
+/// invalid UTF-8 bytes into a `String`.
+///
+/// Pay special attention to the use of raw strings. That is, `r"\n"` is
+/// equivalent to `"\\n"`.
+///
+/// ```
+/// use grep_cli::escape;
+///
+/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
+/// ```
+pub fn escape(mut bytes: &[u8]) -> String {
+ let mut escaped = String::new();
+ while let Some(result) = decode_utf8(bytes) {
+ match result {
+ Ok(cp) => {
+ escape_char(cp, &mut escaped);
+ bytes = &bytes[cp.len_utf8()..];
+ }
+ Err(byte) => {
+ escape_byte(byte, &mut escaped);
+ bytes = &bytes[1..];
+ }
+ }
+ }
+ escaped
+}
+
+/// Escapes an OS string into a human readable string.
+///
+/// This is like [`escape`](fn.escape.html), but accepts an OS string.
+pub fn escape_os(string: &OsStr) -> String {
+ #[cfg(unix)]
+ fn imp(string: &OsStr) -> String {
+ use std::os::unix::ffi::OsStrExt;
+
+ escape(string.as_bytes())
+ }
+
+ #[cfg(not(unix))]
+ fn imp(string: &OsStr) -> String {
+ escape(string.to_string_lossy().as_bytes())
+ }
+
+ imp(string)
+}
+
+/// Unescapes a string.
+///
+/// It supports a limited set of escape sequences:
+///
+/// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes.
+/// * `\xZZ` hexadecimal escapes are mapped to their byte.
+///
+/// Everything else is left as is, including non-hexadecimal escapes like
+/// `\xGG`.
+///
+/// This is useful when it is desirable for a command line argument to be
+/// capable of specifying arbitrary bytes or otherwise make it easier to
+/// specify non-printable characters.
+///
+/// The dual of this routine is [`escape`](fn.escape.html).
+///
+/// # Example
+///
+/// This example shows how to convert an escaped string (which is valid UTF-8)
+/// into a corresponding sequence of bytes. Each escape sequence is mapped to
+/// its bytes, which may include invalid UTF-8.
+///
+/// Pay special attention to the use of raw strings. That is, `r"\n"` is
+/// equivalent to `"\\n"`.
+///
+/// ```
+/// use grep_cli::unescape;
+///
+/// assert_eq!(&b"foo\nbar\xFFbaz"[..], &*unescape(r"foo\nbar\xFFbaz"));
+/// ```
+pub fn unescape(s: &str) -> Vec<u8> {
+ use self::State::*;
+
+ let mut bytes = vec![];
+ let mut state = Literal;
+ for c in s.chars() {
+ match state {
+ Escape => {
+ match c {
+ '\\' => { bytes.push(b'\\'); state = Literal; }
+ 'n' => { bytes.push(b'\n'); state = Literal; }
+ 'r' => { bytes.push(b'\r'); state = Literal; }
+ 't' => { bytes.push(b'\t'); state = Literal; }
+ 'x' => { state = HexFirst; }
+ c => {
+ bytes.extend(format!(r"\{}", c).into_bytes());
+ state = Literal;
+ }
+ }
+ }
+ HexFirst => {
+ match c {
+ '0'...'9' | 'A'...'F' | 'a'...'f' => {
+ state = HexSecond(c);
+ }
+ c => {
+ bytes.extend(format!(r"\x{}", c).into_bytes());
+ state = Literal;
+ }
+ }
+ }
+ HexSecond(first) => {
+ match c {
+ '0'...'9' | 'A'...'F' | 'a'...'f' => {
+ let ordinal = format!("{}{}", first, c);
+ let byte = u8::from_str_radix(&ordinal, 16).unwrap();
+ bytes.push(byte);
+ state = Literal;
+ }
+ c => {
+ let original = format!(r"\x{}{}", first, c);
+ bytes.extend(original.into_bytes());
+ state = Literal;
+ }
+ }
+ }
+ Literal => {
+ match c {
+ '\\' => { state = Escape; }
+ c => { bytes.extend(c.to_string().as_bytes()); }
+ }
+ }
+ }
+ }
+ match state {
+ Escape => bytes.push(b'\\'),
+ HexFirst => bytes.extend(b"\\x"),
+ HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()),
+ Literal => {}
+ }
+ bytes
+}
+
+/// Unescapes an OS string.
+///
+/// This is like [`unescape`](fn.unescape.html), but accepts an OS string.
+///
+/// Note that this first lossily decodes the given OS string as UTF-8. That
+/// is, an escaped string (the thing given) should be valid UTF-8.
+pub fn unescape_os(string: &OsStr) -> Vec<u8> {
+ unescape(&string.to_string_lossy())
+}
+
+/// Adds the given codepoint to the given string, escaping it if necessary.
+fn escape_char(cp: char, into: &mut String) {
+ if cp.is_ascii() {
+ escape_byte(cp as u8, into);
+ } else {
+ into.push(cp);
+ }
+}
+
+/// Adds the given byte to the given string, escaping it if necessary.
+fn escape_byte(byte: u8, into: &mut String) {
+ match byte {
+ 0x21...0x5B | 0x5D...0x7D => into.push(byte as char),
+ b'\n' => into.push_str(r"\n"),
+ b'\r' => into.push_str(r"\r"),
+ b'\t' => into.push_str(r"\t"),
+ b'\\' => into.push_str(r"\\"),
+ _ => into.push_str(&format!(r"\x{:02X}", byte)),
+ }
+}
+
+/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
+///
+/// If no valid encoding of a codepoint exists at the beginning of the given
+/// byte slice, then the first byte is returned instead.
+///
+/// This returns `None` if and only if `bytes` is empty.
+fn decode_utf8(bytes: &[u8]) -> Option<Result<char, u8>> {
+ if bytes.is_empty() {