From 92dc402f7f598c3d0045f94df9988bbe1c0c79cf Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 12 Nov 2016 21:48:11 -0500 Subject: Switch from Docopt to Clap. There were two important reasons for the switch: 1. Performance. Docopt does poorly when the argv becomes large, which is a reasonable common use case for search tools. (e.g., use with xargs) 2. Better failure modes. Clap knows a lot more about how a particular argv might be invalid, and can therefore provide much clearer error messages. While both were important, (1) made it urgent. Note that since Clap requires at least Rust 1.11, this will in turn increase the minimum Rust version supported by ripgrep from Rust 1.9 to Rust 1.11. It is therefore a breaking change, so the soonest release of ripgrep with Clap will have to be 0.3. There is also at least one subtle breaking change in real usage. Previous to this commit, this used to work: rg -e -foo Where this would cause ripgrep to search for the string `-foo`. Clap currently has problems supporting this use case (see: https://github.com/kbknapp/clap-rs/issues/742), but it can be worked around by using this instead: rg -e [-]foo or even rg [-]foo and this still works: rg -- -foo This commit also adds Bash, Fish and PowerShell completion files to the release, fixes a bug that prevented ripgrep from working on file paths containing invalid UTF-8 and shows short descriptions in the output of `-h` but longer descriptions in the output of `--help`. Fixes #136, Fixes #189, Fixes #210, Fixes #230 --- .travis.yml | 7 +- Cargo.lock | 78 +++- Cargo.toml | 8 +- build.rs | 23 ++ ci/before_deploy.sh | 1 + doc/rg.1 | 25 +- doc/rg.1.md | 18 +- src/app.rs | 432 +++++++++++++++++++++ src/args.rs | 1038 ++++++++++++++++++++------------------------------ src/main.rs | 23 +- src/search_stream.rs | 3 +- src/unescape.rs | 128 +++++++ tests/tests.rs | 22 +- tests/workdir.rs | 23 +- 14 files changed, 1173 insertions(+), 656 deletions(-) create mode 100644 build.rs create mode 100644 src/app.rs create mode 100644 src/unescape.rs diff --git a/.travis.yml b/.travis.yml index 85112278..cd7bd542 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,13 +30,10 @@ matrix: env: TARGET=x86_64-apple-darwin # Minimum Rust supported channel. - os: linux - rust: 1.9.0 - env: TARGET=x86_64-unknown-linux-musl - - os: linux - rust: 1.9.0 + rust: 1.11.0 env: TARGET=x86_64-unknown-linux-gnu - os: osx - rust: 1.9.0 + rust: 1.11.0 env: TARGET=x86_64-apple-darwin before_install: diff --git a/Cargo.lock b/Cargo.lock index ecca3818..ada226fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,8 +3,8 @@ name = "ripgrep" version = "0.2.9" dependencies = [ "bytecount 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.18.0 (registry+https://github.com/rust-lang/crates.io-index)", "ctrlc 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "grep 0.1.4", "ignore 0.1.5", @@ -16,7 +16,6 @@ dependencies = [ "memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", "term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -29,6 +28,16 @@ dependencies = [ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "ansi_term" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "bitflags" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "bytecount" version = "0.1.4" @@ -37,6 +46,21 @@ dependencies = [ "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "clap" +version = "2.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", + "term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "crossbeam" version = "0.2.10" @@ -52,17 +76,6 @@ dependencies = [ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "docopt" -version = "0.6.86" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "env_logger" version = "0.3.5" @@ -193,11 +206,6 @@ name = "regex-syntax" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -[[package]] -name = "rustc-serialize" -version = "0.3.19" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "simd" version = "0.1.1" @@ -217,6 +225,16 @@ dependencies = [ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "term_size" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "thread-id" version = "2.0.0" @@ -252,6 +270,16 @@ dependencies = [ "unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "unicode-segmentation" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "unicode-width" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "unreachable" version = "0.1.1" @@ -265,6 +293,11 @@ name = "utf8-ranges" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "vec_map" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "void" version = "1.0.2" @@ -291,10 +324,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [metadata] "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" +"checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6" +"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" "checksum bytecount 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49e3c21915578e2300b08d3c174a8ac887e0c6421dff86fdc4d741dc29e5d413" +"checksum clap 2.18.0 (registry+https://github.com/rust-lang/crates.io-index)" = "40046b8a004bf3ba43b9078bf4b9b6d1708406a234848f925dbd7160a374c8a8" "checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97" "checksum ctrlc 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "77f98bb69e3fefadcc5ca80a1368a55251f70295168203e01165bcaecb270891" -"checksum docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)" = "4a7ef30445607f6fc8720f0a0a2c7442284b629cf0d049286860fae23e71c4d9" "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" "checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344" "checksum fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "640001e1bd865c7c32806292822445af576a6866175b5225aa2087ca5e3de551" @@ -307,16 +342,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad" "checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" "checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" -"checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b" "checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023" "checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e" "checksum term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3deff8a2b3b6607d6d7cc32ac25c0b33709453ca9cceac006caac51e963cf94a" +"checksum term_size 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f7f5f3f71b0040cecc71af239414c23fd3c73570f5ff54cf50e03cef637f2a0" "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" "checksum thread-id 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4437c97558c70d129e40629a5b385b3fb1ffac301e63941335e4d354081ec14a" "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" "checksum thread_local 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50057ca52c629a39aed52d8eb253800cb727875fa6fc7c4b1445f0ac3b50c27c" +"checksum unicode-segmentation 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b905d0fc2a1f0befd86b0e72e31d1787944efef9d38b9358a9e92a69757f7e3b" +"checksum unicode-width 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2d6722facc10989f63ee0e20a83cd4e1714a9ae11529403ac7e0afd069abc39e" "checksum unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1f2ae5ddb18e1c92664717616dd9549dde73f539f01bd7b77c2edb2446bdff91" "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" +"checksum vec_map 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cac5efe5cb0fa14ec2f84f83c701c562ee63f6dcc680861b21d65c682adfb05f" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" "checksum walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "98da26f00240118fbb7a06fa29579d1b39d34cd6e0505ea5c125b26d5260a967" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" diff --git a/Cargo.toml b/Cargo.toml index c4d787ec..afed8439 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ readme = "README.md" keywords = ["regex", "grep", "egrep", "search", "pattern"] license = "Unlicense/MIT" exclude = ["HomebrewFormula"] +build = "build.rs" [[bin]] bench = false @@ -25,8 +26,8 @@ path = "tests/tests.rs" [dependencies] bytecount = "0.1.4" +clap = "2.18" ctrlc = "2.0" -docopt = "0.6" env_logger = "0.3" grep = { version = "0.1.4", path = "grep" } ignore = { version = "0.1.5", path = "ignore" } @@ -37,13 +38,16 @@ memchr = "0.1" memmap = "0.5" num_cpus = "1" regex = "0.1.77" -rustc-serialize = "=0.3.19" term = "0.4" [target.'cfg(windows)'.dependencies] kernel32-sys = "0.2" winapi = "0.2" +[build-dependencies] +clap = "2.18" +lazy_static = "0.2" + [features] avx-accel = ["bytecount/avx-accel"] simd-accel = ["bytecount/simd-accel", "regex/simd-accel"] diff --git a/build.rs b/build.rs new file mode 100644 index 00000000..b2522708 --- /dev/null +++ b/build.rs @@ -0,0 +1,23 @@ +#[macro_use] +extern crate clap; +#[macro_use] +extern crate lazy_static; + +use std::fs; + +use clap::Shell; + +#[allow(dead_code)] +#[path = "src/app.rs"] +mod app; + +fn main() { + fs::create_dir_all(env!("OUT_DIR")).unwrap(); + + let mut app = app::app_short(); + app.gen_completions("rg", Shell::Bash, env!("OUT_DIR")); + app.gen_completions("rg", Shell::Fish, env!("OUT_DIR")); + // Zsh seems to fail with a panic. + // app.gen_completions("rg", Shell::Zsh, env!("OUT_DIR")); + app.gen_completions("rg", Shell::PowerShell, env!("OUT_DIR")); +} diff --git a/ci/before_deploy.sh b/ci/before_deploy.sh index 3d033d87..fedd5fcf 100644 --- a/ci/before_deploy.sh +++ b/ci/before_deploy.sh @@ -19,6 +19,7 @@ mk_tarball() { cp target/$TARGET/release/rg "$td/$name/" cp {doc/rg.1,README.md,UNLICENSE,COPYING,LICENSE-MIT} "$td/$name/" + cp target/$TARGET/release/build/ripgrep-*/out/{_rg.,rg.}* "$td/$name/" pushd $td tar czf "$out_dir/$name.tar.gz" * diff --git a/doc/rg.1 b/doc/rg.1 index d1a59dc9..bc756317 100644 --- a/doc/rg.1 +++ b/doc/rg.1 @@ -7,11 +7,11 @@ rg \- recursively search current directory for lines matching a pattern .SH SYNOPSIS .PP -rg [\f[I]options\f[]] \-e PATTERN ... -[\f[I]<\f[]path\f[I]> ...\f[]] -.PP rg [\f[I]options\f[]] <\f[I]pattern\f[]> [\f[I]<\f[]path\f[I]> ...\f[]] .PP +rg [\f[I]options\f[]] (\-e PATTERN | \-f FILE) ... +[\f[I]<\f[]path\f[I]> ...\f[]] +.PP rg [\f[I]options\f[]] \-\-files [\f[I]<\f[]path\f[I]> ...\f[]] .PP rg [\f[I]options\f[]] \-\-type\-list @@ -163,6 +163,15 @@ Show debug messages. .RS .RE .TP +.B \-f, \-\-file FILE ... +Search for patterns from the given file, with one pattern per line. +When this flag is used or multiple times or in combination with the +\-e/\-\-regexp flag, then all patterns provided are searched. +Empty pattern lines will match all input lines, and the newline is not +counted as part of the pattern. +.RS +.RE +.TP .B \-\-files Print each file that would be searched (but don\[aq]t search). .RS @@ -202,6 +211,16 @@ Search hidden directories and files. .RS .RE .TP +.B \-\-ignore\-file FILE ... +Specify additional ignore files for filtering file paths. +Ignore files should be in the gitignore format and are matched relative +to the current working directory. +These ignore files have lower precedence than all other ignore files. +When specifying multiple ignore files, earlier files have lower +precedence than later files. +.RS +.RE +.TP .B \-L, \-\-follow Follow symlinks. .RS diff --git a/doc/rg.1.md b/doc/rg.1.md index bf850a98..a3d37667 100644 --- a/doc/rg.1.md +++ b/doc/rg.1.md @@ -4,10 +4,10 @@ rg - recursively search current directory for lines matching a pattern # SYNOPSIS -rg [*options*] -e PATTERN ... [*<*path*> ...*] - rg [*options*] <*pattern*> [*<*path*> ...*] +rg [*options*] (-e PATTERN | -f FILE) ... [*<*path*> ...*] + rg [*options*] --files [*<*path*> ...*] rg [*options*] --type-list @@ -107,6 +107,12 @@ Project home page: https://github.com/BurntSushi/ripgrep --debug : Show debug messages. +-f, --file FILE ... +: Search for patterns from the given file, with one pattern per line. When this + flag is used or multiple times or in combination with the -e/--regexp flag, + then all patterns provided are searched. Empty pattern lines will match all + input lines, and the newline is not counted as part of the pattern. + --files : Print each file that would be searched (but don't search). @@ -132,6 +138,14 @@ Project home page: https://github.com/BurntSushi/ripgrep : Search hidden directories and files. (Hidden directories and files are skipped by default.) +--ignore-file FILE ... +: Specify additional ignore files for filtering file paths. + Ignore files should be in the gitignore format and are matched + relative to the current working directory. These ignore files + have lower precedence than all other ignore files. When + specifying multiple ignore files, earlier files have lower + precedence than later files. + -L, --follow : Follow symlinks. diff --git a/src/app.rs b/src/app.rs new file mode 100644 index 00000000..5edaf999 --- /dev/null +++ b/src/app.rs @@ -0,0 +1,432 @@ +use std::collections::HashMap; + +use clap::{App, AppSettings, Arg}; + +const ABOUT: &'static str = " +ripgrep (rg) recursively searches your current directory for a regex pattern. + +Project home page: https://github.com/BurntSushi/ripgrep + +Use -h for short descriptions and --help for more details."; + +const USAGE: &'static str = " + rg [OPTIONS] [ ...] + rg [OPTIONS] [-e PATTERN | -f FILE ]... [ ...] + rg [OPTIONS] --files [ ...] + rg [OPTIONS] --type-list"; + +const TEMPLATE: &'static str = "\ +{bin} {version} +{author} +{about} + +USAGE:{usage} + +ARGS: +{positionals} + +OPTIONS: +{unified}"; + +/// Build a clap application with short help strings. +pub fn app_short() -> App<'static, 'static> { + app(false, |k| USAGES[k].short) +} + +/// Build a clap application with long help strings. +pub fn app_long() -> App<'static, 'static> { + app(true, |k| USAGES[k].long) +} + +/// Build a clap application parameterized by usage strings. +/// +/// The function given should take a clap argument name and return a help +/// string. `app` will panic if a usage string is not defined. +/// +/// This is an intentionally stand-alone module so that it can be used easily +/// in a `build.rs` script to build shell completion files. +fn app(next_line_help: bool, doc: F) -> App<'static, 'static> + where F: Fn(&'static str) -> &'static str { + let arg = |name| { + Arg::with_name(name).help(doc(name)).next_line_help(next_line_help) + }; + let flag = |name| arg(name).long(name); + + App::new("ripgrep") + .author(crate_authors!()) + .version(crate_version!()) + .about(ABOUT) + .max_term_width(100) + .setting(AppSettings::UnifiedHelpMessage) + .usage(USAGE) + .template(TEMPLATE) + // Handle help/version manually to make their output formatting + // consistent with short/long views. + .arg(arg("help-short").short("h")) + .arg(flag("help")) + .arg(flag("version").short("V")) + // First, set up primary positional/flag arguments. + .arg(arg("pattern") + .required_unless_one(&[ + "file", "files", "help-short", "help", "regexp", "type-list", + "version", + ])) + .arg(arg("path").multiple(true)) + .arg(flag("regexp").short("e") + .takes_value(true).multiple(true).number_of_values(1) + .value_name("pattern")) + .arg(flag("files") + // This should also conflict with `pattern`, but the first file + // path will actually be in `pattern`. + .conflicts_with_all(&["file", "regexp", "type-list"])) + .arg(flag("type-list") + .conflicts_with_all(&["file", "files", "pattern", "regexp"])) + // Second, set up common flags. + .arg(flag("text").short("a")) + .arg(flag("count").short("c")) + .arg(flag("color") + .value_name("WHEN") + .takes_value(true) + .hide_possible_values(true) + .possible_values(&["never", "always", "auto"])) + .arg(flag("fixed-strings").short("F")) + .arg(flag("glob").short("g") + .takes_value(true).multiple(true).number_of_values(1) + .value_name("GLOB")) + .arg(flag("ignore-case").short("i")) + .arg(flag("line-number").short("n")) + .arg(flag("no-line-number").short("N")) + .arg(flag("quiet").short("q")) + .arg(flag("type").short("t") + .takes_value(true).multiple(true).number_of_values(1) + .value_name("TYPE")) + .arg(flag("type-not").short("T") + .takes_value(true).multiple(true).number_of_values(1) + .value_name("TYPE")) + .arg(flag("unrestricted").short("u") + .multiple(true)) + .arg(flag("invert-match").short("v")) + .arg(flag("word-regexp").short("w")) + // Third, set up less common flags. + .arg(flag("after-context").short("A") + .value_name("NUM").takes_value(true) + .validator(validate_number)) + .arg(flag("before-context").short("B") + .value_name("NUM").takes_value(true) + .validator(validate_number)) + .arg(flag("context").short("C") + .value_name("NUM").takes_value(true) + .validator(validate_number)) + .arg(flag("column")) + .arg(flag("context-separator").value_name("ARG").takes_value(true)) + .arg(flag("debug")) + .arg(flag("file").short("f") + .value_name("FILE").takes_value(true) + .multiple(true).number_of_values(1)) + .arg(flag("files-with-matches").short("l")) + .arg(flag("with-filename").short("H")) + .arg(flag("no-filename")) + .arg(flag("heading")) + .arg(flag("no-heading")) + .arg(flag("hidden")) + .arg(flag("ignore-file") + .value_name("FILE").takes_value(true) + .multiple(true).number_of_values(1)) + .arg(flag("follow").short("L")) + .arg(flag("max-count") + .short("m").value_name("NUM").takes_value(true) + .validator(validate_number)) + .arg(flag("maxdepth") + .value_name("NUM").takes_value(true) + .validator(validate_number)) + .arg(flag("mmap")) + .arg(flag("no-messages")) + .arg(flag("no-mmap")) + .arg(flag("no-ignore")) + .arg(flag("no-ignore-parent")) + .arg(flag("no-ignore-vcs")) + .arg(flag("null")) + .arg(flag("pretty").short("p")) + .arg(flag("replace").short("r").value_name("ARG").takes_value(true)) + .arg(flag("case-sensitive").short("s")) + .arg(flag("smart-case").short("S")) + .arg(flag("threads") + .short("j").value_name("ARG").takes_value(true) + .validator(validate_number)) + .arg(flag("vimgrep")) + .arg(flag("type-add") + .value_name("TYPE").takes_value(true) + .multiple(true).number_of_values(1)) + .arg(flag("type-clear") + .value_name("TYPE").takes_value(true) + .multiple(true).number_of_values(1)) +} + +struct Usage { + short: &'static str, + long: &'static str, +} + +macro_rules! doc { + ($map:expr, $name:expr, $short:expr) => { + doc!($map, $name, $short, $short) + }; + ($map:expr, $name:expr, $short:expr, $long:expr) => { + $map.insert($name, Usage { + short: $short, + long: concat!($long, "\n "), + }); + }; +} + +lazy_static! { + static ref USAGES: HashMap<&'static str, Usage> = { + let mut h = HashMap::new(); + doc!(h, "help-short", + "Show short help output.", + "Show short help output. Use --help to show more details."); + doc!(h, "help", + "Show verbose help output.", + "When given, more details about flags are provided."); + doc!(h, "version", + "Prints version information."); + + doc!(h, "pattern", + "A regular expression used for searching.", + "A regular expression used for searching. Multiple patterns \ + may be given. To match a pattern beginning with a -, use [-]."); + doc!(h, "regexp", + "A regular expression used for searching.", + "A regular expression used for searching. Multiple patterns \ + may be given. To match a pattern beginning with a -, use [-]."); + doc!(h, "path", + "A file or directory to search.", + "A file or directory to search. Directories are searched \ + recursively."); + doc!(h, "files", + "Print each file that would be searched.", + "Print each file that would be searched without actually \ + performing the search. This is useful to determine whether a \ + particular file is being searched or not."); + doc!(h, "type-list", + "Show all supported file types.", + "Show all supported file types and their corresponding globs."); + + doc!(h, "text", + "Search binary files as if they were text."); + doc!(h, "count", + "Only show count of matches for each file."); + doc!(h, "color", + "When to use color. [default: auto]", + "When to use color in the output. The possible values are \ + never, always or auto. The default is auto."); + doc!(h, "fixed-strings", + "Treat the pattern as a literal string.", + "Treat the pattern as a literal string instead of a regular \ + expression. When this flag is used, special regular expression \ + meta characters such as (){}*+. do not need to be escaped."); + doc!(h, "glob", + "Include or exclude files/directories.", + "Include or exclude files/directories for searching that \ + match the given glob. This always overrides any other \ + ignore logic. Multiple glob flags may be used. Globbing \ + rules match .gitignore globs. Precede a glob with a ! \ + to exclude it."); + doc!(h, "ignore-case", + "Case insensitive search.", + "Case insensitive search. This is overridden by \ + --case-sensitive."); + doc!(h, "line-number", + "Show line numbers.", + "Show line numbers (1-based). This is enabled by default when \ + searching in a tty."); + doc!(h, "no-line-number", + "Suppress line numbers.", + "Suppress line numbers. This is enabled by default when NOT \ + searching in a tty."); + doc!(h, "quiet", + "Do not print anything to stdout.", + "Do not print anything to stdout. If a match is found in a file, \ + stop searching. This is useful when ripgrep is used only for \ + its exit code."); + doc!(h, "type", + "Only search files matching TYPE.", + "Only search files matching TYPE. Multiple type flags may be \ + provided. Use the --type-list flag to list all available \ + types."); + doc!(h, "type-not", + "Do not search files matching TYPE.", + "Do not search files matching TYPE. Multiple type-not flags may \ + be provided. Use the --type-list flag to list all available \ + types."); + doc!(h, "unrestricted", + "Reduce the level of \"smart\" searching.", + "Reduce the level of \"smart\" searching. A single -u \ + won't respect .gitignore (etc.) files. Two -u flags will \ + additionally search hidden files and directories. Three \ + -u flags will additionally search binary files. -uu is \ + roughly equivalent to grep -r and -uuu is roughly \ + equivalent to grep -a -r."); + doc!(h, "invert-match", + "Invert matching.", + "Invert matching. Show lines that don't match given patterns."); + doc!(h, "word-regexp", + "Only show matches surrounded by word boundaries.", + "Only show matches surrounded by word boundaries. This is \ + equivalent to putting \\b before and after all of the search \ + patterns."); + + doc!(h, "after-context", + "Show NUM lines after each match."); + doc!(h, "before-context", + "Show NUM lines before each match."); + doc!(h, "context", + "Show NUM lines before and after each match."); + doc!(h, "column", + "Show column numbers", + "Show column numbers (1-based). This only shows the column \ + numbers for the first match on each line. This does not try \ + to account for Unicode. One byte is equal to one column."); + doc!(h, "context-separator", + "Set the context separator string. [default: --]", + "The string used to separate non-contiguous context lines in the \ + output. Escape sequences like \\x7F or \\t may be used. The \ + default value is --."); + doc!(h, "debug", + "Show debug messages.", + "Show debug messages. Please use this when filing a bug report."); + doc!(h, "file", + "Search for patterns from the given file.", + "Search for patterns from the given file, with one pattern per \ + line. When this flag is used or multiple times or in \ + combination with the -e/--regexp flag, then all patterns \ + provided are searched. Empty pattern lines will match all input \ + lines, and the newline is not counted as part of the pattern."); + doc!(h, "files-with-matches", + "Only show the path of each file with at least one match."); + doc!(h, "with-filename", + "Show file name for each match.", + "Prefix each match with the file name that contains it. This is \ + the default when more than one file is searched."); + doc!(h, "no-filename", + "Never show the file name for a match.", + "Never show the file name for a match. This is the default when \ + one file is searched."); + doc!(h, "heading", + "Show matches grouped by each file.", + "This shows the file name above clusters of matches from each \ + file. This is the default mode at a tty."); + doc!(h, "no-heading", + "Don't group matches by each file.", + "Don't group matches by each file. This is the default mode \ + when not at a tty."); + doc!(h, "hidden", + "Search hidden files and directories.", + "Search hidden files and directories. By default, hidden files \ + and directories are skipped."); + doc!(h, "ignore-file", + "Specify additional ignore files.", + "Specify additional ignore files for filtering file paths. \ + Ignore files should be in the gitignore format and are matched \ + relative to the current working directory. These ignore files \ + have lower precedence than all other ignore files. When \ + specifying multiple ignore files, earlier files have lower \ + precedence than later files."); + doc!(h, "follow", + "Follow symbolic links."); + doc!(h, "max-count", + "Limit the number of matches.", + "Limit the number of matching lines per file searched to NUM."); + doc!(h, "maxdepth", + "Descend at most NUM directories.", + "Limit the depth of directory traversal to NUM levels beyond \ + the paths given. A value of zero only searches the \ + starting-points themselves.\n\nFor example, \ + 'rg --maxdepth 0 dir/' is a no-op because dir/ will not be \ + descended into. 'rg --maxdepth 1 dir/' will search only the \ + direct children of dir/."); + doc!(h, "mmap", + "Searching using memory maps when possible.", + "Search using memory maps when possible. This is enabled by \ + default when ripgrep thinks it will be faster. Note that memory \ + map searching doesn't currently support all options, so if an \ + incompatible option (e.g., --context) is given with --mmap, \ + then memory maps will not be used."); + doc!(h, "no-messages", + "Suppress all error messages.", + "Suppress all error messages. This is equivalent to redirecting \ + stderr to /dev/null."); + doc!(h, "no-mmap", + "Never use memory maps.", + "Never use memory maps, even when they might be faster."); + doc!(h, "no-ignore", + "Don't respect ignore files.", + "Don't respect ignore files (.gitignore, .ignore, etc.). This \ + implies --no-ignore-parent and --no-ignore-vcs."); + doc!(h, "no-ignore-parent", + "Don't respect ignore files in parent directories.", + "Don't respect ignore files (.gitignore, .ignore, etc.) in \ + parent directories."); + doc!(h, "no-ignore-vcs", + "Don't respect VCS ignore files", + "Don't respect version control ignore files (.gitignore, etc.). \ + This implies --no-ignore-parent. Note that .ignore files will \ + continue to be respected."); + doc!(h, "null", + "Print NUL byte after file names", + "Whenever a file name is printed, follow it with a NUL byte. \ + This includes printing file names before matches, and when \ + printing a list of matching files such as with --count, \ + --files-with-matches and --files. This option is useful for use \ + with xargs."); + doc!(h, "pretty", + "Alias for --color always --heading -n."); + doc!(h, "replace", + "Replace matches with string given.", + "Replace every match with the string given when printing \ + results. Neither this flag nor any other flag will modify your \ + files.\n\nCapture group indices (e.g., $5) and names \ + (e.g., $foo) are supported in the replacement string."); + doc!(h, "case-sensitive", + "Search case sensitively.", + "Search case sensitively. This overrides -i/--ignore-case and \ + -S/--smart-case."); + doc!(h, "smart-case", + "Smart case search.", + "Searches case insensitively if the pattern is all lowercase. \ + Search case sensitively otherwise. This is overridden by \ + either -s/--case-sensitive or -i/--ignore-case."); + doc!(h, "threads", + "The approximate number of threads to use.", + "The approximate number of threads to use. A value of 0 (which \ + is the default) causes ripgrep to choose the thread count \ + using heuristics."); + doc!(h, "vimgrep", + "Show results in vim compatible format.", + "Show results with every match on its own line, including \ + line numbers and column numbers. With this option, a line with \ + more than one match will be printed more than once."); + + doc!(h, "type-add", + "Add a new glob for a file type.", + "Add a new glob for a particular file type. Only one glob can be \ + added at a time. Multiple --type-add flags can be provided. \ + Unless --type-clear is used, globs are added to any existing \ + globs defined inside of ripgrep.\n\nNote that this MUST be \ + passed to every invocation of ripgrep. Type settings are NOT \ + persisted.\n\nExample: \ + rg --type-add 'foo:*.foo' -tfoo PATTERN."); + doc!(h, "type-clear", + "Clear globs for given file type.", + "Clear the file type globs previously defined for TYPE. This \ + only clears the default tpye definitions that are found inside \ + of ripgrep.\n\nNote that this MUST be passed to every \ + invocation of ripgrep. Type settings are NOT persisted."); + + h + }; +} + +fn validate_number(s: String) -> Result<(), String> { + s.parse::().map(|_|()).map_err(|err| err.to_string()) +} diff --git a/src/args.rs b/src/args.rs index cb858f80..97cf24fe 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,11 +1,13 @@ use std::cmp; use std::env; +use std::ffi::OsStr; use std::fs; use std::io::{self, BufRead}; +use std::ops; use std::path::{Path, PathBuf}; use std::process; -use docopt::{self, Docopt}; +use clap; use env_logger; use grep::{Grep, GrepBuilder}; use log; @@ -18,6 +20,7 @@ use term; use term::WinConsole; use atty; +use app; use ignore::overrides::{Override, OverrideBuilder}; use ignore::types::{FileTypeDef, Types, TypesBuilder}; use ignore; @@ -25,272 +28,12 @@ use out::{Out, ColoredTerminal}; use printer::Printer; #[cfg(windows)] use terminal_win::WindowsBuffer; +use unescape::unescape; use worker::{Worker, WorkerBuilder}; -use Result; - -/// The Docopt usage string. -/// -/// If you've never heard of Docopt before, see: http://docopt.org -/// (TL;DR: The CLI parser is generated from the usage string below.) -const USAGE: &'static str = " -Usage: rg [options] -e PATTERN ... [ ...] - rg [options] -f FILE [ ...] - rg [options] [ ...] - rg [options] --files [ ...] - rg [options] --type-list - rg [options] --help - rg [options] --version - -ripgrep (rg) recursively searches your current directory for a regex pattern. - -Project home page: https://github.com/BurntSushi/ripgrep - -Common options: - -a, --text Search binary files as if they were text. - -c, --count Only show count of line matches for each file. - --color WHEN Whether to use coloring in match. - Valid values are never, always or auto. - [default: auto] - -e, --regexp PATTERN ... Use PATTERN to search. This option can be - provided multiple times, where all patterns - given are searched. This is also useful when - searching for a pattern that starts with a dash. - -F, --fixed-strings Treat the pattern as a literal string instead of - a regular expression. - -g, --glob GLOB ... Include or exclude files for searching that - match the given glob. This always overrides any - other ignore logic. Multiple glob flags may be - used. Globbing rules match .gitignore globs. - Precede a glob with a '!' to exclude it. - -h, --help Show this usage message. - -i, --ignore-case Case insensitive search. - Overridden by --case-sensitive. - -n, --line-number Show line numbers (1-based). This is enabled - by default at a tty. - -N, --no-line-number Suppress line numbers. - -q, --quiet Do not print anything to stdout. If a match is - found in a file, stop searching that file. - -t, --type TYPE ... Only search files matching TYPE. Multiple type - flags may be provided. Use the --type-list flag - to list all available types. - -T, --type-not TYPE ... Do not search files matching TYPE. Multiple - not-type flags may be provided. - -u, --unrestricted ... Reduce the level of 'smart' searching. A - single -u doesn't respect .gitignore (etc.) - files. Two -u flags will search hidden files - and directories. Three -u flags will search - binary files. -uu is equivalent to grep -r, - and -uuu is equivalent to grep -a -r. - -v, --invert-match Invert matching. - -w, --word-regexp Only show matches surrounded by word boundaries. - This is equivalent to putting \\b before and - after the search pattern. - -Less common options: - -A, --after-context NUM - Show NUM lines after each match. - - -B, --before-context NUM - Show NUM lines before each match. - - -C, --context NUM - Show NUM lines before and after each match. - - --column - Show column numbers (1 based) in output. This only shows the column - numbers for the first match on each line. Note that this doesn't try - to account for Unicode. One byte is equal to one column. - - --context-separator ARG - The string to use when separating non-continuous context lines. Escape - sequences may be used. [default: --] - - --debug - Show debug messages. - - -f, --file FILE - Search for patterns specified in a file, one per line. Empty pattern - lines will match all input lines, and the newline is not counted as part - of the pattern. - - --files - Print each file that would be searched (but don't search). - - -l, --files-with-matches - Only show path of each file with matches. - - -H, --with-filename - Prefix each match with the file name that contains it. This is the - default when more than one file is searched. - - --no-filename - Never show the filename for a match. This is the default when - one file is searched. - - --heading - Show the file name above clusters of matches from each file. - This is the default mode at a tty. - - --no-heading - Don't show any file name heading. - - --hidden - Search hidden directories and files. (Hidden directories and files are - skipped by default.) - - --ignore-file FILE ... - Specify additional ignore files for filtering file paths. Ignore files - should be in the gitignore format and are matched relative to the - current working directory. These ignore files have lower precedence - than all other ignore file types. When specifying multiple ignore - files, earlier files have lower precedence than later files. - - -L, --follow - Follow symlinks. - - -m, --max-count NUM - Limit the number of matching lines per file searched to NUM. - - --maxdepth NUM - Descend at most NUM directories below the command line arguments. - A value of zero only searches the starting-points themselves. - - --mmap - Search using memory maps when possible. This is enabled by default - when ripgrep thinks it will be faster. (Note that mmap searching - doesn't currently support the various context related options.) - - --no-messages - Suppress all error messages. - - --no-mmap - Never use memory maps, even when they might be faster. - - --no-ignore - Don't respect ignore files (.gitignore, .ignore, etc.) - This implies --no-ignore-parent. - - --no-ignore-parent - Don't respect ignore files in parent directories. - - --no-ignore-vcs - Don't respect version control ignore files (e.g., .gitignore). - Note that .ignore files will continue to be respected. - - --null - Whenever a file name is printed, follow it with a NUL byte. - This includes printing filenames before matches, and when printing - a list of matching files such as with --count, --files-with-matches - and --files. - - -p, --pretty - Alias for --color=always --heading -n. - - -r, --replace ARG - Replace every match with the string given when printing search results. - Neither this flag nor any other flag will modify your files. - - Capture group indices (e.g., $5) and names (e.g., $foo) are supported - in the replacement string. - - -s, --case-sensitive - Search case sensitively. This overrides --ignore-case and --smart-case. - - -S, --smart-case - Search case insensitively if the pattern is all lowercase. - Search case sensitively otherwise. This is overridden by - either --case-sensitive or --ignore-case. - - -j, --threads ARG - The number of threads to use. 0 means use the number of logical CPUs - (capped at 6). [default: 0] - - --version - Show the version number of ripgrep and exit. - - --vimgrep - Show results with every match on its own line, including line - numbers and column numbers. (With this option, a line with more - than one match of the regex will be printed more than once.) - -File type management options: - --type-list - Show all supported file types and their associated globs. - - --type-add ARG ... - Add a new glob for a particular file type. Only one glob can be - added at a time. Multiple --type-add flags can be provided. - Unless --type-clear is used, globs are added to any existing globs - inside of ripgrep. Note that this must be passed to every invocation of - rg. Type settings are NOT persisted. - - Example: `rg --type-add 'foo:*.foo' -tfoo PATTERN` - - --type-clear TYPE ... - Clear the file type globs previously defined for TYPE. This only clears - the default type definitions that are found inside of ripgrep. Note - that this must be passed to every invocation of rg. -"; - -/// RawArgs are the args as they are parsed from Docopt. They aren't used -/// directly by the rest of ripgrep. -#[derive(Debug, RustcDecodable)] -pub struct RawArgs { - arg_pattern: String, - arg_path: Vec, - flag_after_context: usize, - flag_before_context: usize, - flag_case_sensitive: bool, - flag_color: String, - flag_column: bool, - flag_context: usize, - flag_context_separator: String, - flag_count: bool, - flag_files_with_matches: bool, - flag_debug: bool, - flag_file: Option, - flag_files: bool, - flag_follow: bool, - flag_glob: Vec, - flag_heading: bool, - flag_hidden: bool, - flag_ignore_case: bool, - flag_ignore_file: Vec, - flag_invert_match: bool, - flag_line_number: bool, - flag_fixed_strings: bool, - flag_max_count: Option, - flag_maxdepth: Option, - flag_mmap: bool, - flag_no_heading: bool, - flag_no_ignore: bool, - flag_no_ignore_parent: bool, - flag_no_ignore_vcs: bool, - flag_no_line_number: bool, - flag_no_messages: bool, - flag_no_mmap: bool, - flag_no_filename: bool, - flag_null: bool, - flag_pretty: bool, - flag_quiet: bool, - flag_regexp: Vec, - flag_replace: Option, - flag_smart_case: bool, - flag_text: bool, - flag_threads: usize, - flag_type: Vec, - flag_type_not: Vec, - flag_type_list: bool, - flag_type_add: Vec, - flag_type_clear: Vec, - flag_unrestricted: u32, - flag_vimgrep: bool, - flag_with_filename: bool, - flag_word_regexp: bool, -} +use {Result, version}; -/// Args are transformed/normalized from RawArgs. +/// Args are transformed/normalized from ArgMatches. #[derive(Debug)] pub struct Args { paths: Vec, @@ -308,7 +51,6 @@ pub struct Args { grep: Grep, heading: bool, hidden: bool, - ignore_case: bool, ignore_files: Vec, invert_match: bool, line_number: bool, @@ -330,222 +72,6 @@ pub struct Args { with_filename: bool, } -impl RawArgs { - /// Convert arguments parsed into a configuration used by ripgrep. - fn to_args(&self) -> Result { - let paths = - if self.arg_path.is_empty() { - if atty::on_stdin() - || self.flag_files - || self.flag_type_list - || !atty::stdin_is_readable() { - vec![Path::new("./").to_path_buf()] - } else { - vec![Path::new("-").to_path_buf()] - } - } else { - self.arg_path.iter().map(|p| { - Path::new(p).to_path_buf() - }).collect() - }; - let (after_context, before_context) = - if self.flag_context > 0 { - (self.flag_context, self.flag_context) - } else { - (self.flag_after_context, self.flag_before_context) - }; - let mmap = - if before_context > 0 || after_context > 0 || self.flag_no_mmap { - false - } else if self.flag_mmap { - true - } else if cfg!(windows) { - // On Windows, memory maps appear faster than read calls. Neat. - true - } else if cfg!(target_os = "macos") { - // On Mac, memory maps appear to suck. Neat. - false - } else { - // If we're only searching a few paths and all of them are - // files, then memory maps are probably faster. - paths.len() <= 10 && paths.iter().all(|p| p.is_file()) - }; - if mmap { - debug!("will try to use memory maps"); - } - let glob_overrides = - if self.flag_glob.is_empty() { - Override::empty() - } else { - let mut ovr = OverrideBuilder::new(try!(env::current_dir())); - for pat in &self.flag_glob { - try!(ovr.add(pat)); - } - try!(ovr.build()) - }; - let threads = - if self.flag_threads == 0 { - cmp::min(12, num_cpus::get()) - } else { - self.flag_threads - }; - let color = - if self.flag_color == "always" { - true - } else if self.flag_vimgrep { - false - } else if self.flag_color == "auto" { - atty::on_stdout() || self.flag_pretty - } else { - false - }; - - let mut with_filename = self.flag_with_filename; - if !with_filename { - with_filename = paths.len() > 1 || paths[0].is_dir(); - } - with_filename = with_filename && !self.flag_no_filename; - - let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1; - let hidden = self.flag_hidden || self.flag_unrestricted >= 2; - let text = self.flag_text || self.flag_unrestricted >= 3; - let ignore_files: Vec<_> = self.flag_ignore_file.iter().map(|p| { - Path::new(p).to_path_buf() - }).collect(); - let mut args = Args { - paths: paths, - after_context: after_context, - before_context: before_context, - color: color, - column: self.flag_column, - context_separator: unescape(&self.flag_context_separator), - count: self.flag_count, - files_with_matches: self.flag_files_with_matches, - eol: self.eol(), - files: self.flag_files, - follow: self.flag_follow, - glob_overrides: glob_overrides, - grep: try!(self.grep()), - heading: !self.flag_no_heading && self.flag_heading, - hidden: hidden, - ignore_case: self.flag_ignore_case, - ignore_files: ignore_files, - invert_match: self.flag_invert_match, - line_number: !self.flag_no_line_number && self.flag_line_number, - line_per_match: self.flag_vimgrep, - max_count: self.flag_max_count.map(|max| max as u64), - maxdepth: self.flag_maxdepth, - mmap: mmap, - no_ignore: no_ignore, - no_ignore_parent: - // --no-ignore implies --no-ignore-parent - self.flag_no_ignore_parent || no_ignore, - no_ignore_vcs: - // --no-ignore implies --no-ignore-vcs - self.flag_no_ignore_vcs || no_ignore, - no_messages: self.flag_no_messages, - null: self.flag_null, - quiet: self.flag_quiet, - replace: self.flag_replace.clone().map(|s| s.into_bytes()), - text: text, - threads: threads, - type_list: self.flag_type_list, - types: try!(self.types()), - with_filename: with_filename, - }; - // If stdout is a tty, then apply some special default options. - if atty::on_stdout() || self.flag_pretty { - if !self.flag_no_line_number && !args.count { - args.line_number = true; - } - if !self.flag_no_heading { - args.heading = true; - } - } - if self.flag_vimgrep { - args.column = true; - args.line_number = true; - } - Ok(args) - } - - fn types(&self) -> Result { - let mut btypes = TypesBuilder::new(); - btypes.add_defaults(); - for ty in &self.flag_type_clear { - btypes.clear(ty); - } - for def in &self.flag_type_add { - try!(btypes.add_def(def)); - } - for ty in &self.flag_type { - btypes.select(ty); - } - for ty in &self.flag_type_not { - btypes.negate(ty); - } - btypes.build().map_err(From::from) - } - - fn pattern(&self) -> Result { - let patterns: Vec = if !self.flag_regexp.is_empty() { - self.flag_regexp.iter().cloned().collect() - } else if let Some(ref file) = self.flag_file { - if file == "-" { - // We need two local variables here to get the lock - // lifetimes correct. - let stdin = io::stdin(); - let result = stdin.lock().lines().collect(); - try!(result) - } else { - let f = try!(fs::File::open(&Path::new(file))); - try!(io::BufReader::new(f).lines().collect()) - } - } else { - vec![self.arg_pattern.clone()] - }; - - if self.flag_fixed_strings { - Ok(patterns.into_iter().map(|p| { - self.word_pattern(regex::quote(&p)) - }).collect::>().join("|")) - } else { - Ok(patterns.into_iter().map(|p| { - self.word_pattern(p) - }).collect::>().join("|")) - } - } - - fn word_pattern(&self, s: String) -> String { - if self.flag_word_regexp { - format!(r"\b{}\b", s) - } else { - s - } - } - - fn eol(&self) -> u8 { - // We might want to make this configurable. - b'\n' - } - - fn grep(&self) -> Result { - let smart = - self.flag_smart_case - && !self.flag_ignore_case - && !self.flag_case_sensitive; - let casei = - self.flag_ignore_case - && !self.flag_case_sensitive; - GrepBuilder::new(&try!(self.pattern())) - .case_smart(smart) - .case_insensitive(casei) - .line_terminator(self.eol()) - .build() - .map_err(From::from) - } -} - impl Args { /// Parse the command line arguments for this process. /// @@ -555,34 +81,24 @@ impl Args { /// /// Also, initialize a global logger. pub fn parse() -> Result { - // Get all of the arguments, being careful to require valid UTF-8. - let mut argv = vec![]; - for arg in env::args_os() { - match arg.into_string() { - Ok(s) => argv.push(s), - Err(s) => { - errored!("Argument '{}' is not valid UTF-8. \ - Use hex escape sequences to match arbitrary \ - bytes in a pattern (e.g., \\xFF).", - s.to_string_lossy()); - } - } + let matches = app::app_short().get_matches(); + if matches.is_present("help-short") { + let _ = ::app::app_short().print_help(); + let _ = println!(""); + process::exit(0); + } + if matches.is_present("help") { + let _ = ::app::app_long().print_help(); + let _ = println!(""); + process::exit(0); + } + if matches.is_present("version") { + println!("ripgrep {}", crate_version!()); + process::exit(0); } - let mut raw: RawArgs = - Docopt::new(USAGE) - .and_then(|d| d.argv(argv).version(Some(version())).decode()) - .unwrap_or_else(|e| { - match e { - docopt::Error::Version(ref v) => { - println!("ripgrep {}", v); - process::exit(0); - } - e => e.exit(), - } - }); let mut logb = env_logger::LogBuilder::new(); - if raw.flag_debug { + if matches.is_present("debug") { logb.filter(None, log::LogLevelFilter::Debug); } else { logb.filter(None, log::LogLevelFilter::Warn); @@ -590,15 +106,7 @@ impl Args { if let Err(err) = logb.init() { errored!("failed to initialize logger: {}", err); } - - // *sigh*... If --files is given, then the first path ends up in - // pattern. - if raw.flag_files { - if !raw.arg_pattern.is_empty() { - raw.arg_path.insert(0, raw.arg_pattern.clone()); - } - } - raw.to_args().map_err(From::from) + ArgMatches(matches).to_args() } /// Returns true if ripgrep should print the files it will search and exit @@ -780,140 +288,436 @@ impl Args { } } -fn version() -> String { - let (maj, min, pat) = ( - option_env!("CARGO_PKG_VERSION_MAJOR"), - option_env!("CARGO_PKG_VERSION_MINOR"), - option_env!("CARGO_PKG_VERSION_PATCH"), - ); - match (maj, min, pat) { - (Some(maj), Some(min), Some(pat)) => - format!("{}.{}.{}", maj, min, pat), - _ => "".to_owned(), - } -} +/// ArgMatches wraps clap::ArgMatches and provides semantic meaning to several +/// options/flags. +struct ArgMatches<'a>(clap::ArgMatches<'a>); -/// A single state in the state machine used by `unescape`. -#[derive(Clone, Copy, Eq, PartialEq)] -enum State { - Escape, - HexFirst, - HexSecond(char), - Literal, +impl<'a> ops::Deref for ArgMatches<'a> { + type Target = clap::ArgMatches<'a>; + fn deref(&self) -> &clap::ArgMatches<'a> { &self.0 } } -/// Unescapes a string given on the command line. It supports a limited set of -/// escape sequences: -/// -/// * \t, \r and \n are mapped to their corresponding ASCII bytes. -/// * \xZZ hexadecimal escapes are mapped to their byte. -fn unescape(s: &str) -> Vec { - use self::State::*; - - let mut bytes = vec![]; - let mut state = Literal; - for c in s.chars() { - match state { - Escape => { - match c { - 'n' => { bytes.push(b'\n'); state = Literal; } - 'r' => { bytes.push(b'\r'); state = Literal; } - 't' => { bytes.push(b'\t'); state = Literal; } - 'x' => { state = HexFirst; } - c => { - bytes.extend(&format!(r"\{}", c).into_bytes()); - state = Literal; +impl<'a> ArgMatches<'a> { + /// Convert the result of parsing CLI arguments into ripgrep's + /// configuration. + fn to_args(&self) -> Result { + let paths = self.paths(); + let mmap = try!(self.mmap(&paths)); + let with_filename = self.with_filename(&paths); + let (before_context, after_context) = try!(self.contexts()); + let args = Args { + paths: paths, + after_context: after_context, + before_context: before_context, + color: self.color(), + column: self.column(), + context_separator: self.context_separator(), + count: self.is_present("count"), + files_with_matches: self.is_present("files-with-matches"), + eol: b'\n', + files: self.is_present("files"), + follow: self.is_present("follow"), + glob_overrides: try!(self.overrides()), + grep: try!(self.grep()), + heading: self.heading(), + hidden: self.hidden(), + ignore_files: self.ignore_files(), + invert_match: self.is_present("invert-match"), + line_number: self.line_number(), + line_per_match: self.is_present("vimgrep"), + max_count: try!(self.usize_of("max-count")).map(|max| max as u64), + maxdepth: try!(self.usize_of("maxdepth")), + mmap: mmap, + no_ignore: self.no_ignore(), + no_ignore_parent: self.no_ignore_parent(), + no_ignore_vcs: self.no_ignore_vcs(), + no_messages: self.is_present("no-messages"), + null: self.is_present("null"), + quiet: self.is_present("quiet"), + replace: self.replace(), + text: self.text(), + threads: try!(self.threads()), + type_list: self.is_present("type-list"), + types: try!(self.types()), + with_filename: with_filename, + }; + if args.mmap { + debug!("will try to use memory maps"); + } + Ok(args) + } + + /// Return all file paths that ripgrep should search. + fn paths(&self) -> Vec { + let mut paths: Vec = match self.values_of_os("path") { + None => vec![], + Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(), + }; + // If --file, --files or --regexp is given, then the first path is + // always in `pattern`. + if self.is_present("file") + || self.is_present("files") + || self.is_present("regexp") { + if let Some(path) = self.value_of_os("pattern") { + paths.insert(0, Path::new(path).to_path_buf()); + } + } + if paths.is_empty() { + paths.push(self.default_path()); + } + paths + } + + /// Return the default path that ripgrep should search. + fn default_path(&self) -> PathBuf { + let search_cwd = atty::on_stdin() + || self.is_present("files") + || self.is_present("type-list") + || !atty::stdin_is_readable(); + if search_cwd { + Path::new("./").to_path_buf() + } else { + Path::new("-").to_path_buf() + } + } + + /// Return all of the ignore files given on the command line. + fn ignore_files(&self) -> Vec { + match self.values_of_os("ignore-file") { + None => return vec![], + Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(), + } + } + + /// Return the pattern that should be used for searching. + /// + /// If multiple -e/--regexp flags are given, then they are all collapsed + /// into one pattern. + /// + /// If any part of the pattern isn't valid UTF-8, then an error is + /// returne