summaryrefslogtreecommitdiffstats
path: root/src/args.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/args.rs')
-rw-r--r--src/args.rs1038
1 files changed, 421 insertions, 617 deletions
diff --git a/src/args.rs b/src/args.rs
index cb858f80..97cf24fe 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -1,11 +1,13 @@
use std::cmp;
use std::env;
+use std::ffi::OsStr;
use std::fs;
use std::io::{self, BufRead};
+use std::ops;
use std::path::{Path, PathBuf};
use std::process;
-use docopt::{self, Docopt};
+use clap;
use env_logger;
use grep::{Grep, GrepBuilder};
use log;
@@ -18,6 +20,7 @@ use term;
use term::WinConsole;
use atty;
+use app;
use ignore::overrides::{Override, OverrideBuilder};
use ignore::types::{FileTypeDef, Types, TypesBuilder};
use ignore;
@@ -25,272 +28,12 @@ use out::{Out, ColoredTerminal};
use printer::Printer;
#[cfg(windows)]
use terminal_win::WindowsBuffer;
+use unescape::unescape;
use worker::{Worker, WorkerBuilder};
-use Result;
-
-/// The Docopt usage string.
-///
-/// If you've never heard of Docopt before, see: http://docopt.org
-/// (TL;DR: The CLI parser is generated from the usage string below.)
-const USAGE: &'static str = "
-Usage: rg [options] -e PATTERN ... [<path> ...]
- rg [options] -f FILE [<path> ...]
- rg [options] <pattern> [<path> ...]
- rg [options] --files [<path> ...]
- rg [options] --type-list
- rg [options] --help
- rg [options] --version
-
-ripgrep (rg) recursively searches your current directory for a regex pattern.
-
-Project home page: https://github.com/BurntSushi/ripgrep
-
-Common options:
- -a, --text Search binary files as if they were text.
- -c, --count Only show count of line matches for each file.
- --color WHEN Whether to use coloring in match.
- Valid values are never, always or auto.
- [default: auto]
- -e, --regexp PATTERN ... Use PATTERN to search. This option can be
- provided multiple times, where all patterns
- given are searched. This is also useful when
- searching for a pattern that starts with a dash.
- -F, --fixed-strings Treat the pattern as a literal string instead of
- a regular expression.
- -g, --glob GLOB ... Include or exclude files for searching that
- match the given glob. This always overrides any
- other ignore logic. Multiple glob flags may be
- used. Globbing rules match .gitignore globs.
- Precede a glob with a '!' to exclude it.
- -h, --help Show this usage message.
- -i, --ignore-case Case insensitive search.
- Overridden by --case-sensitive.
- -n, --line-number Show line numbers (1-based). This is enabled
- by default at a tty.
- -N, --no-line-number Suppress line numbers.
- -q, --quiet Do not print anything to stdout. If a match is
- found in a file, stop searching that file.
- -t, --type TYPE ... Only search files matching TYPE. Multiple type
- flags may be provided. Use the --type-list flag
- to list all available types.
- -T, --type-not TYPE ... Do not search files matching TYPE. Multiple
- not-type flags may be provided.
- -u, --unrestricted ... Reduce the level of 'smart' searching. A
- single -u doesn't respect .gitignore (etc.)
- files. Two -u flags will search hidden files
- and directories. Three -u flags will search
- binary files. -uu is equivalent to grep -r,
- and -uuu is equivalent to grep -a -r.
- -v, --invert-match Invert matching.
- -w, --word-regexp Only show matches surrounded by word boundaries.
- This is equivalent to putting \\b before and
- after the search pattern.
-
-Less common options:
- -A, --after-context NUM
- Show NUM lines after each match.
-
- -B, --before-context NUM
- Show NUM lines before each match.
-
- -C, --context NUM
- Show NUM lines before and after each match.
-
- --column
- Show column numbers (1 based) in output. This only shows the column
- numbers for the first match on each line. Note that this doesn't try
- to account for Unicode. One byte is equal to one column.
-
- --context-separator ARG
- The string to use when separating non-continuous context lines. Escape
- sequences may be used. [default: --]
-
- --debug
- Show debug messages.
-
- -f, --file FILE
- Search for patterns specified in a file, one per line. Empty pattern
- lines will match all input lines, and the newline is not counted as part
- of the pattern.
-
- --files
- Print each file that would be searched (but don't search).
-
- -l, --files-with-matches
- Only show path of each file with matches.
-
- -H, --with-filename
- Prefix each match with the file name that contains it. This is the
- default when more than one file is searched.
-
- --no-filename
- Never show the filename for a match. This is the default when
- one file is searched.
-
- --heading
- Show the file name above clusters of matches from each file.
- This is the default mode at a tty.
-
- --no-heading
- Don't show any file name heading.
-
- --hidden
- Search hidden directories and files. (Hidden directories and files are
- skipped by default.)
-
- --ignore-file FILE ...
- Specify additional ignore files for filtering file paths. Ignore files
- should be in the gitignore format and are matched relative to the
- current working directory. These ignore files have lower precedence
- than all other ignore file types. When specifying multiple ignore
- files, earlier files have lower precedence than later files.
-
- -L, --follow
- Follow symlinks.
-
- -m, --max-count NUM
- Limit the number of matching lines per file searched to NUM.
-
- --maxdepth NUM
- Descend at most NUM directories below the command line arguments.
- A value of zero only searches the starting-points themselves.
-
- --mmap
- Search using memory maps when possible. This is enabled by default
- when ripgrep thinks it will be faster. (Note that mmap searching
- doesn't currently support the various context related options.)
-
- --no-messages
- Suppress all error messages.
-
- --no-mmap
- Never use memory maps, even when they might be faster.
-
- --no-ignore
- Don't respect ignore files (.gitignore, .ignore, etc.)
- This implies --no-ignore-parent.
-
- --no-ignore-parent
- Don't respect ignore files in parent directories.
-
- --no-ignore-vcs
- Don't respect version control ignore files (e.g., .gitignore).
- Note that .ignore files will continue to be respected.
-
- --null
- Whenever a file name is printed, follow it with a NUL byte.
- This includes printing filenames before matches, and when printing
- a list of matching files such as with --count, --files-with-matches
- and --files.
-
- -p, --pretty
- Alias for --color=always --heading -n.
-
- -r, --replace ARG
- Replace every match with the string given when printing search results.
- Neither this flag nor any other flag will modify your files.
-
- Capture group indices (e.g., $5) and names (e.g., $foo) are supported
- in the replacement string.
-
- -s, --case-sensitive
- Search case sensitively. This overrides --ignore-case and --smart-case.
-
- -S, --smart-case
- Search case insensitively if the pattern is all lowercase.
- Search case sensitively otherwise. This is overridden by
- either --case-sensitive or --ignore-case.
-
- -j, --threads ARG
- The number of threads to use. 0 means use the number of logical CPUs
- (capped at 6). [default: 0]
-
- --version
- Show the version number of ripgrep and exit.
-
- --vimgrep
- Show results with every match on its own line, including line
- numbers and column numbers. (With this option, a line with more
- than one match of the regex will be printed more than once.)
-
-File type management options:
- --type-list
- Show all supported file types and their associated globs.
-
- --type-add ARG ...
- Add a new glob for a particular file type. Only one glob can be
- added at a time. Multiple --type-add flags can be provided.
- Unless --type-clear is used, globs are added to any existing globs
- inside of ripgrep. Note that this must be passed to every invocation of
- rg. Type settings are NOT persisted.
-
- Example: `rg --type-add 'foo:*.foo' -tfoo PATTERN`
-
- --type-clear TYPE ...
- Clear the file type globs previously defined for TYPE. This only clears
- the default type definitions that are found inside of ripgrep. Note
- that this must be passed to every invocation of rg.
-";
-
-/// RawArgs are the args as they are parsed from Docopt. They aren't used
-/// directly by the rest of ripgrep.
-#[derive(Debug, RustcDecodable)]
-pub struct RawArgs {
- arg_pattern: String,
- arg_path: Vec<String>,
- flag_after_context: usize,
- flag_before_context: usize,
- flag_case_sensitive: bool,
- flag_color: String,
- flag_column: bool,
- flag_context: usize,
- flag_context_separator: String,
- flag_count: bool,
- flag_files_with_matches: bool,
- flag_debug: bool,
- flag_file: Option<String>,
- flag_files: bool,
- flag_follow: bool,
- flag_glob: Vec<String>,
- flag_heading: bool,
- flag_hidden: bool,
- flag_ignore_case: bool,
- flag_ignore_file: Vec<String>,
- flag_invert_match: bool,
- flag_line_number: bool,
- flag_fixed_strings: bool,
- flag_max_count: Option<usize>,
- flag_maxdepth: Option<usize>,
- flag_mmap: bool,
- flag_no_heading: bool,
- flag_no_ignore: bool,
- flag_no_ignore_parent: bool,
- flag_no_ignore_vcs: bool,
- flag_no_line_number: bool,
- flag_no_messages: bool,
- flag_no_mmap: bool,
- flag_no_filename: bool,
- flag_null: bool,
- flag_pretty: bool,
- flag_quiet: bool,
- flag_regexp: Vec<String>,
- flag_replace: Option<String>,
- flag_smart_case: bool,
- flag_text: bool,
- flag_threads: usize,
- flag_type: Vec<String>,
- flag_type_not: Vec<String>,
- flag_type_list: bool,
- flag_type_add: Vec<String>,
- flag_type_clear: Vec<String>,
- flag_unrestricted: u32,
- flag_vimgrep: bool,
- flag_with_filename: bool,
- flag_word_regexp: bool,
-}
+use {Result, version};
-/// Args are transformed/normalized from RawArgs.
+/// Args are transformed/normalized from ArgMatches.
#[derive(Debug)]
pub struct Args {
paths: Vec<PathBuf>,
@@ -308,7 +51,6 @@ pub struct Args {
grep: Grep,
heading: bool,
hidden: bool,
- ignore_case: bool,
ignore_files: Vec<PathBuf>,
invert_match: bool,
line_number: bool,
@@ -330,222 +72,6 @@ pub struct Args {
with_filename: bool,
}
-impl RawArgs {
- /// Convert arguments parsed into a configuration used by ripgrep.
- fn to_args(&self) -> Result<Args> {
- let paths =
- if self.arg_path.is_empty() {
- if atty::on_stdin()
- || self.flag_files
- || self.flag_type_list
- || !atty::stdin_is_readable() {
- vec![Path::new("./").to_path_buf()]
- } else {
- vec![Path::new("-").to_path_buf()]
- }
- } else {
- self.arg_path.iter().map(|p| {
- Path::new(p).to_path_buf()
- }).collect()
- };
- let (after_context, before_context) =
- if self.flag_context > 0 {
- (self.flag_context, self.flag_context)
- } else {
- (self.flag_after_context, self.flag_before_context)
- };
- let mmap =
- if before_context > 0 || after_context > 0 || self.flag_no_mmap {
- false
- } else if self.flag_mmap {
- true
- } else if cfg!(windows) {
- // On Windows, memory maps appear faster than read calls. Neat.
- true
- } else if cfg!(target_os = "macos") {
- // On Mac, memory maps appear to suck. Neat.
- false
- } else {
- // If we're only searching a few paths and all of them are
- // files, then memory maps are probably faster.
- paths.len() <= 10 && paths.iter().all(|p| p.is_file())
- };
- if mmap {
- debug!("will try to use memory maps");
- }
- let glob_overrides =
- if self.flag_glob.is_empty() {
- Override::empty()
- } else {
- let mut ovr = OverrideBuilder::new(try!(env::current_dir()));
- for pat in &self.flag_glob {
- try!(ovr.add(pat));
- }
- try!(ovr.build())
- };
- let threads =
- if self.flag_threads == 0 {
- cmp::min(12, num_cpus::get())
- } else {
- self.flag_threads
- };
- let color =
- if self.flag_color == "always" {
- true
- } else if self.flag_vimgrep {
- false
- } else if self.flag_color == "auto" {
- atty::on_stdout() || self.flag_pretty
- } else {
- false
- };
-
- let mut with_filename = self.flag_with_filename;
- if !with_filename {
- with_filename = paths.len() > 1 || paths[0].is_dir();
- }
- with_filename = with_filename && !self.flag_no_filename;
-
- let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
- let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
- let text = self.flag_text || self.flag_unrestricted >= 3;
- let ignore_files: Vec<_> = self.flag_ignore_file.iter().map(|p| {
- Path::new(p).to_path_buf()
- }).collect();
- let mut args = Args {
- paths: paths,
- after_context: after_context,
- before_context: before_context,
- color: color,
- column: self.flag_column,
- context_separator: unescape(&self.flag_context_separator),
- count: self.flag_count,
- files_with_matches: self.flag_files_with_matches,
- eol: self.eol(),
- files: self.flag_files,
- follow: self.flag_follow,
- glob_overrides: glob_overrides,
- grep: try!(self.grep()),
- heading: !self.flag_no_heading && self.flag_heading,
- hidden: hidden,
- ignore_case: self.flag_ignore_case,
- ignore_files: ignore_files,
- invert_match: self.flag_invert_match,
- line_number: !self.flag_no_line_number && self.flag_line_number,
- line_per_match: self.flag_vimgrep,
- max_count: self.flag_max_count.map(|max| max as u64),
- maxdepth: self.flag_maxdepth,
- mmap: mmap,
- no_ignore: no_ignore,
- no_ignore_parent:
- // --no-ignore implies --no-ignore-parent
- self.flag_no_ignore_parent || no_ignore,
- no_ignore_vcs:
- // --no-ignore implies --no-ignore-vcs
- self.flag_no_ignore_vcs || no_ignore,
- no_messages: self.flag_no_messages,
- null: self.flag_null,
- quiet: self.flag_quiet,
- replace: self.flag_replace.clone().map(|s| s.into_bytes()),
- text: text,
- threads: threads,
- type_list: self.flag_type_list,
- types: try!(self.types()),
- with_filename: with_filename,
- };
- // If stdout is a tty, then apply some special default options.
- if atty::on_stdout() || self.flag_pretty {
- if !self.flag_no_line_number && !args.count {
- args.line_number = true;
- }
- if !self.flag_no_heading {
- args.heading = true;
- }
- }
- if self.flag_vimgrep {
- args.column = true;
- args.line_number = true;
- }
- Ok(args)
- }
-
- fn types(&self) -> Result<Types> {
- let mut btypes = TypesBuilder::new();
- btypes.add_defaults();
- for ty in &self.flag_type_clear {
- btypes.clear(ty);
- }
- for def in &self.flag_type_add {
- try!(btypes.add_def(def));
- }
- for ty in &self.flag_type {
- btypes.select(ty);
- }
- for ty in &self.flag_type_not {
- btypes.negate(ty);
- }
- btypes.build().map_err(From::from)
- }
-
- fn pattern(&self) -> Result<String> {
- let patterns: Vec<String> = if !self.flag_regexp.is_empty() {
- self.flag_regexp.iter().cloned().collect()
- } else if let Some(ref file) = self.flag_file {
- if file == "-" {
- // We need two local variables here to get the lock
- // lifetimes correct.
- let stdin = io::stdin();
- let result = stdin.lock().lines().collect();
- try!(result)
- } else {
- let f = try!(fs::File::open(&Path::new(file)));
- try!(io::BufReader::new(f).lines().collect())
- }
- } else {
- vec![self.arg_pattern.clone()]
- };
-
- if self.flag_fixed_strings {
- Ok(patterns.into_iter().map(|p| {
- self.word_pattern(regex::quote(&p))
- }).collect::<Vec<String>>().join("|"))
- } else {
- Ok(patterns.into_iter().map(|p| {
- self.word_pattern(p)
- }).collect::<Vec<String>>().join("|"))
- }
- }
-
- fn word_pattern(&self, s: String) -> String {
- if self.flag_word_regexp {
- format!(r"\b{}\b", s)
- } else {
- s
- }
- }
-
- fn eol(&self) -> u8 {
- // We might want to make this configurable.
- b'\n'
- }
-
- fn grep(&self) -> Result<Grep> {
- let smart =
- self.flag_smart_case
- && !self.flag_ignore_case
- && !self.flag_case_sensitive;
- let casei =
- self.flag_ignore_case
- && !self.flag_case_sensitive;
- GrepBuilder::new(&try!(self.pattern()))
- .case_smart(smart)
- .case_insensitive(casei)
- .line_terminator(self.eol())
- .build()
- .map_err(From::from)
- }
-}
-
impl Args {
/// Parse the command line arguments for this process.
///
@@ -555,34 +81,24 @@ impl Args {
///
/// Also, initialize a global logger.
pub fn parse() -> Result<Args> {
- // Get all of the arguments, being careful to require valid UTF-8.
- let mut argv = vec![];
- for arg in env::args_os() {
- match arg.into_string() {
- Ok(s) => argv.push(s),
- Err(s) => {
- errored!("Argument '{}' is not valid UTF-8. \
- Use hex escape sequences to match arbitrary \
- bytes in a pattern (e.g., \\xFF).",
- s.to_string_lossy());
- }
- }
+ let matches = app::app_short().get_matches();
+ if matches.is_present("help-short") {
+ let _ = ::app::app_short().print_help();
+ let _ = println!("");
+ process::exit(0);
+ }
+ if matches.is_present("help") {
+ let _ = ::app::app_long().print_help();
+ let _ = println!("");
+ process::exit(0);
+ }
+ if matches.is_present("version") {
+ println!("ripgrep {}", crate_version!());
+ process::exit(0);
}
- let mut raw: RawArgs =
- Docopt::new(USAGE)
- .and_then(|d| d.argv(argv).version(Some(version())).decode())
- .unwrap_or_else(|e| {
- match e {
- docopt::Error::Version(ref v) => {
- println!("ripgrep {}", v);
- process::exit(0);
- }
- e => e.exit(),
- }
- });
let mut logb = env_logger::LogBuilder::new();
- if raw.flag_debug {
+ if matches.is_present("debug") {
logb.filter(None, log::LogLevelFilter::Debug);
} else {
logb.filter(None, log::LogLevelFilter::Warn);
@@ -590,15 +106,7 @@ impl Args {
if let Err(err) = logb.init() {
errored!("failed to initialize logger: {}", err);
}
-
- // *sigh*... If --files is given, then the first path ends up in
- // pattern.
- if raw.flag_files {
- if !raw.arg_pattern.is_empty() {
- raw.arg_path.insert(0, raw.arg_pattern.clone());
- }
- }
- raw.to_args().map_err(From::from)
+ ArgMatches(matches).to_args()
}
/// Returns true if ripgrep should print the files it will search and exit
@@ -780,140 +288,436 @@ impl Args {
}
}
-fn version() -> String {
- let (maj, min, pat) = (
- option_env!("CARGO_PKG_VERSION_MAJOR"),
- option_env!("CARGO_PKG_VERSION_MINOR"),
- option_env!("CARGO_PKG_VERSION_PATCH"),
- );
- match (maj, min, pat) {
- (Some(maj), Some(min), Some(pat)) =>
- format!("{}.{}.{}", maj, min, pat),
- _ => "".to_owned(),
- }
-}
+/// ArgMatches wraps clap::ArgMatches and provides semantic meaning to several
+/// options/flags.
+struct ArgMatches<'a>(clap::ArgMatches<'a>);
-/// A single state in the state machine used by `unescape`.
-#[derive(Clone, Copy, Eq, PartialEq)]
-enum State {
- Escape,
- HexFirst,
- HexSecond(char),
- Literal,
+impl<'a> ops::Deref for ArgMatches<'a> {
+ type Target = clap::ArgMatches<'a>;
+ fn deref(&self) -> &clap::ArgMatches<'a> { &self.0 }
}
-/// Unescapes a string given on the command line. It supports a limited set of
-/// escape sequences:
-///
-/// * \t, \r and \n are mapped to their corresponding ASCII bytes.
-/// * \xZZ hexadecimal escapes are mapped to their byte.
-fn unescape(s: &str) -> Vec<u8> {
- use self::State::*;
-
- let mut bytes = vec![];
- let mut state = Literal;
- for c in s.chars() {
- match state {
- Escape => {
- match c {
- 'n' => { bytes.push(b'\n'); state = Literal; }
- 'r' => { bytes.push(b'\r'); state = Literal; }
- 't' => { bytes.push(b'\t'); state = Literal; }
- 'x' => { state = HexFirst; }
- c => {
- bytes.extend(&format!(r"\{}", c).into_bytes());
- state = Literal;
+impl<'a> ArgMatches<'a> {
+ /// Convert the result of parsing CLI arguments into ripgrep's
+ /// configuration.
+ fn to_args(&self) -> Result<Args> {
+ let paths = self.paths();
+ let mmap = try!(self.mmap(&paths));
+ let with_filename = self.with_filename(&paths);
+ let (before_context, after_context) = try!(self.contexts());
+ let args = Args {
+ paths: paths,
+ after_context: after_context,
+ before_context: before_context,
+ color: self.color(),
+ column: self.column(),
+ context_separator: self.context_separator(),
+ count: self.is_present("count"),
+ files_with_matches: self.is_present("files-with-matches"),
+ eol: b'\n',
+ files: self.is_present("files"),
+ follow: self.is_present("follow"),
+ glob_overrides: try!(self.overrides()),
+ grep: try!(self.grep()),
+ heading: self.heading(),
+ hidden: self.hidden(),
+ ignore_files: self.ignore_files(),
+ invert_match: self.is_present("invert-match"),
+ line_number: self.line_number(),
+ line_per_match: self.is_present("vimgrep"),
+ max_count: try!(self.usize_of("max-count")).map(|max| max as u64),
+ maxdepth: try!(self.usize_of("maxdepth")),
+ mmap: mmap,
+ no_ignore: self.no_ignore(),
+ no_ignore_parent: self.no_ignore_parent(),
+ no_ignore_vcs: self.no_ignore_vcs(),
+ no_messages: self.is_present("no-messages"),
+ null: self.is_present("null"),
+ quiet: self.is_present("quiet"),
+ replace: self.replace(),
+ text: self.text(),
+ threads: try!(self.threads()),
+ type_list: self.is_present("type-list"),
+ types: try!(self.types()),
+ with_filename: with_filename,
+ };
+ if args.mmap {
+ debug!("will try to use memory maps");
+ }
+ Ok(args)
+ }
+
+ /// Return all file paths that ripgrep should search.
+ fn paths(&self) -> Vec<PathBuf> {
+ let mut paths: Vec<PathBuf> = match self.values_of_os("path") {
+ None => vec![],
+ Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(),
+ };
+ // If --file, --files or --regexp is given, then the first path is
+ // always in `pattern`.
+ if self.is_present("file")
+ || self.is_present("files")
+ || self.is_present("regexp") {
+ if let Some(path) = self.value_of_os("pattern") {
+ paths.insert(0, Path::new(path).to_path_buf());
+ }
+ }
+ if paths.is_empty() {
+ paths.push(self.default_path());
+ }
+ paths
+ }
+
+ /// Return the default path that ripgrep should search.
+ fn default_path(&self) -> PathBuf {
+ let search_cwd = atty::on_stdin()
+ || self.is_present("files")
+ || self.is_present("type-list")
+ || !atty::stdin_is_readable();
+ if search_cwd {
+ Path::new("./").to_path_buf()
+ } else {
+ Path::new("-").to_path_buf()
+ }
+ }
+
+ /// Return all of the ignore files given on the command line.
+ fn ignore_files(&self) -> Vec<PathBuf> {
+ match self.values_of_os("ignore-file") {
+ None => return vec![],
+ Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(),
+ }
+ }
+
+ /// Return the pattern that should be used for searching.
+ ///
+ /// If multiple -e/--regexp flags are given, then they are all collapsed
+ /// into one pattern.
+ ///
+ /// If any part of the pattern isn't valid UTF-8, then an error is
+ /// returned.
+ fn pattern(&self) -> Result<String> {
+ Ok(try!(self.patterns()).join("|"))
+ }
+
+ /// Get a sequence of all available patterns from the command line.
+ /// This includes reading the -e/--regexp and -f/--file flags.
+ ///
+ /// Note that if -F/--fixed-strings is set, then all patterns will be
+ /// escaped. Similarly, if -w/--word-regexp is set, then all patterns
+ /// are surrounded by `\b`.
+ ///
+ /// If any pattern is invalid UTF-8, then an error is returned.
+ fn patterns(&self) -> Result<Vec<String>> {
+ let mut pats = vec![];
+ match self.values_of_os("regexp") {
+ None => {
+ if self.values_of_os("file").is_none() {
+ if let Some(os_pat) = self.value_of_os("pattern") {
+ pats.push(try!(self.os_str_pattern(os_pat)));
}
}
}
- HexFirst => {
- match c {
- '0'...'9' | 'A'...'F' | 'a'...'f' => {
- state = HexSecond(c);
- }
- c => {
- bytes.extend(&format!(r"\x{}", c).into_bytes());
- state = Literal;
- }
+ Some(os_pats) => {
+ for os_pat in os_pats {
+ pats.push(try!(self.os_str_pattern(os_pat)));
}
}
- HexSecond(first) => {
- match c {
- '0'...'9' | 'A'...'F' | 'a'...'f' => {
- let ordinal = format!("{}{}", first, c);
- let byte = u8::from_str_radix(&ordinal, 16).unwrap();
- bytes.push(byte);
- state = Literal;
+ }
+ if let Some(files) = self.values_of_os("file") {
+ for file in files {
+ if file == "-" {
+ let stdin = io::stdin();
+ for line in stdin.lock().lines() {
+ pats.push(self.str_pattern(&try!(line)));
}
- c => {
- let original = format!(r"\x{}{}", first, c);
- bytes.extend(&original.into_bytes());
- state = Literal;
+ } else {
+ let f = try!(fs::File::open(file));
+ for line in io::BufReader::new(f).lines() {
+ pats.push(self.str_pattern(&try!(line)));
}
}
}
- Literal => {
- match c {
- '\\' => { state = Escape; }
- c => { bytes.extend(c.to_string().as_bytes()); }
- }
- }
}
+ if pats.is_empty() {
+ pats.push(self.empty_pattern())
+ }
+ Ok(pats)
}
- match state {
- Escape => bytes.push(b'\\'),
- HexFirst => bytes.extend(b"\\x"),
- HexSecond(c) => bytes.extend(&format!("\\x{}", c).into_bytes()),
- Literal => {}
+
+ /// Converts an OsStr pattern to a String pattern, including word
+ /// boundaries or escapes if applicable.
+ ///
+ /// If the pattern is not valid UTF-8, then an error is returned.
+ fn os_str_pattern(&self, pat: &OsStr) -> Result<String> {
+ let s = try!(pattern_to_str(pat));
+ Ok(self.str_pattern(s))
+ }
+
+ /// Converts a &str pattern to a String pattern, including word
+ /// boundaries or escapes if applicable.
+ fn str_pattern(&self, pat: &str) -> String {
+ let s = self.word_pattern(self.literal_pattern(pat.to_string()));
+ if s.is_empty() {
+ self.empty_pattern()
+ } else {
+ s
+ }
}
- bytes
-}
-#[cfg(test)]
-mod tests {
- use super::unescape;
+ /// Returns the given pattern as a literal pattern if the
+ /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned
+ /// unchanged.
+ fn literal_pattern(&self, pat: String) -> String {
+ if self.is_present("fixed-strings") {
+ regex::quote(&pat)
+ } else {
+ pat
+ }
+ }
- fn b(bytes: &'static [u8]) -> Vec<u8> {
- bytes.to_vec()
+ /// Returns the given pattern as a word pattern if the -w/--word-regexp
+ /// flag is set. Otherwise, the pattern is returned unchanged.
+ fn word_pattern(&self, pat: String) -> String {
+ if self.is_present("word-regexp") {
+ format!(r"\b{}\b", pat)
+ } else {
+ pat
+ }
}
- #[test]
- fn unescape_nul() {
- assert_eq!(b(b"\x00"), unescape(r"\x00"));
+ /// Empty pattern returns a pattern that is guaranteed to produce an empty
+ /// regular expression that is valid in any position.
+ fn empty_pattern(&self) -> String {
+ // This would normally just be an empty string, which works on its
+ // own, but if the patterns are joined in a set of alternations, then
+ // you wind up with `foo|`, which is invalid.
+ self.word_pattern("z{0}".to_string())
}
- #[test]
- fn unescape_nl() {
- assert_eq!(b(b"\n"), unescape(r"\n"));
+ /// Returns true if and only if file names containing each match should
+ /// be emitted.
+ ///
+ /// `paths` should be a slice of all top-level file paths that ripgrep
+ /// will need to search.
+ fn with_filename(&self, paths: &[PathBuf]) -> bool {
+ if self.is_present("no-filename") {
+ false
+ } else {
+ self.is_present("with-filename")
+ || paths.len() > 1
+ || paths.get(0).map_or(false, |p| p.is_dir())
+ }
}
- #[test]
- fn unescape_tab() {
- assert_eq!(b(b"\t"), unescape(r"\t"));
+ /// Returns true if and only if memory map searching should be tried.
+ ///
+ /// `paths` should be a slice of all top-level file paths that ripgrep
+ /// will need to search.
+ fn mmap(&self, paths: &[PathBuf]) -> Result<bool> {
+ let (before, after) = try!(self.contexts());
+ Ok(if before > 0 || after > 0 || self.is_present("no-mmap") {
+ false
+ } else if self.is_present("mmap") {
+ true
+ } else if cfg!(windows) {
+ // On Windows, memory maps appear faster than read calls. Neat.
+ true
+ } else if cfg!(target_os = "macos") {
+ // On Mac, memory maps appear to suck. Neat.
+ false
+ } else {
+ // If we're only searching a few paths and all of them are
+ // files, then memory maps are probably faster.
+ paths.len() <= 10 && paths.iter().all(|p| p.is_file())
+ })
}
- #[test]
- fn unescape_carriage() {
- assert_eq!(b(b"\r"), unescape(r"\r"));
+ /// Returns true if and only if line numbers should be shown.
+ fn line_number(&self) -> bool {
+ if self.is_present("no-line-number") || self.is_present("count") {
+ false
+ } else {
+ self.is_present("line-number")
+ || atty::on_stdout()
+ || self.is_present("pretty")
+ || self.is_present("vimgrep")
+ }
}
- #[test]
- fn unescape_nothing_simple() {
- assert_eq!(b(b"\\a"), unescape(r"\a"));
+ /// Returns true if and only if column numbers should be shown.
+ fn column(&self) -> bool {
+ self.is_present("column") || self.is_present("vimgrep")
}
- #[test]
- fn unescape_nothing_hex0() {
- assert_eq!(b(b"\\x"), unescape(r"\x"));
+ /// Returns true if and only if matches should be grouped with file name
+ /// headings.
+ fn heading(&self) -> bool {
+ if self.is_present("no-heading") {
+ false
+ } else {
+ self.is_present("heading")
+ || atty::on_stdout()
+ || self.is_present("pretty")
+ }
+ }
+
+ /// Returns the replacement string as UTF-8 bytes if it exists.
+ fn replace(&self) -> Option<Vec<u8>> {
+ self.value_of_lossy("replace").map(|s| s.into_owned().into_bytes())
}
- #[test]
- fn unescape_nothing_hex1() {
- assert_eq!(b(b"\\xz"), unescape(r"\xz"));
+ /// Returns the unescaped context separator in UTF-8 bytes.
+ fn context_separator(&self) -> Vec<u8> {
+ match self.value_of_lossy("context-separator") {
+ None => b"--".to_vec(),
+ Some(sep) => unescape(&sep),
+ }
}
- #[test]
- fn unescape_nothing_hex2() {
- assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
+ /// Returns the before and after contexts from the command line.
+ ///
+ /// If a context setting was absent, then `0` is returned.
+ /