summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/app.rs381
-rw-r--r--src/args.rs2044
-rw-r--r--src/config.rs19
-rw-r--r--src/logger.rs25
-rw-r--r--src/main.rs580
-rw-r--r--src/messages.rs50
-rw-r--r--src/path_printer.rs101
-rw-r--r--src/pathutil.rs42
-rw-r--r--src/preprocessor.rs15
-rw-r--r--src/printer.rs928
-rw-r--r--src/search.rs408
-rw-r--r--src/search_buffer.rs424
-rw-r--r--src/search_stream.rs1466
-rw-r--r--src/subject.rs230
-rw-r--r--src/worker.rs413
15 files changed, 2568 insertions, 4558 deletions
diff --git a/src/app.rs b/src/app.rs
index 24851c3b..a0b036d5 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -2,8 +2,8 @@
// including some light validation.
//
// This module is purposely written in a bare-bones way, since it is included
-// in ripgrep's build.rs file as a way to generate completion files for common
-// shells.
+// in ripgrep's build.rs file as a way to generate a man page and completion
+// files for common shells.
//
// The only other place that ripgrep deals with clap is in src/args.rs, which
// is where we read clap's configuration from the end user's arguments and turn
@@ -82,7 +82,34 @@ pub fn app() -> App<'static, 'static> {
/// the RIPGREP_BUILD_GIT_HASH env var is inspect for it. If that isn't set,
/// then a revision hash is not included in the version string returned.
pub fn long_version(revision_hash: Option<&str>) -> String {
- // Let's say whether faster CPU instructions are enabled or not.
+ // Do we have a git hash?
+ // (Yes, if ripgrep was built on a machine with `git` installed.)
+ let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) {
+ None => String::new(),
+ Some(githash) => format!(" (rev {})", githash),
+ };
+ // Put everything together.
+ let runtime = runtime_cpu_features();
+ if runtime.is_empty() {
+ format!(
+ "{}{}\n{} (compiled)",
+ crate_version!(),
+ hash,
+ compile_cpu_features().join(" ")
+ )
+ } else {
+ format!(
+ "{}{}\n{} (compiled)\n{} (runtime)",
+ crate_version!(),
+ hash,
+ compile_cpu_features().join(" "),
+ runtime.join(" ")
+ )
+ }
+}
+
+/// Returns the relevant CPU features enabled at compile time.
+fn compile_cpu_features() -> Vec<&'static str> {
let mut features = vec![];
if cfg!(feature = "simd-accel") {
features.push("+SIMD");
@@ -94,14 +121,33 @@ pub fn long_version(revision_hash: Option<&str>) -> String {
} else {
features.push("-AVX");
}
- // Do we have a git hash?
- // (Yes, if ripgrep was built on a machine with `git` installed.)
- let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) {
- None => String::new(),
- Some(githash) => format!(" (rev {})", githash),
- };
- // Put everything together.
- format!("{}{}\n{}", crate_version!(), hash, features.join(" "))
+ features
+}
+
+/// Returns the relevant CPU features enabled at runtime.
+#[cfg(all(ripgrep_runtime_cpu, target_arch = "x86_64"))]
+fn runtime_cpu_features() -> Vec<&'static str> {
+ // This is kind of a dirty violation of abstraction, since it assumes
+ // knowledge about what specific SIMD features are being used.
+
+ let mut features = vec![];
+ if is_x86_feature_detected!("ssse3") {
+ features.push("+SIMD");
+ } else {
+ features.push("-SIMD");
+ }
+ if is_x86_feature_detected!("avx2") {
+ features.push("+AVX");
+ } else {
+ features.push("-AVX");
+ }
+ features
+}
+
+/// Returns the relevant CPU features enabled at runtime.
+#[cfg(not(all(ripgrep_runtime_cpu, target_arch = "x86_64")))]
+fn runtime_cpu_features() -> Vec<&'static str> {
+ vec![]
}
/// Arg is a light alias for a clap::Arg that is specialized to compile time
@@ -478,7 +524,7 @@ impl RGArg {
}
}
-// We add an extra space to long descriptions so that a black line is inserted
+// We add an extra space to long descriptions so that a blank line is inserted
// between flag descriptions in --help output.
macro_rules! long {
($lit:expr) => { concat!($lit, " ") }
@@ -502,6 +548,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_context_separator(&mut args);
flag_count(&mut args);
flag_count_matches(&mut args);
+ flag_crlf(&mut args);
flag_debug(&mut args);
flag_dfa_size_limit(&mut args);
flag_encoding(&mut args);
@@ -518,6 +565,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_ignore_case(&mut args);
flag_ignore_file(&mut args);
flag_invert_match(&mut args);
+ flag_json(&mut args);
flag_line_number(&mut args);
flag_line_regexp(&mut args);
flag_max_columns(&mut args);
@@ -525,6 +573,8 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_max_depth(&mut args);
flag_max_filesize(&mut args);
flag_mmap(&mut args);
+ flag_multiline(&mut args);
+ flag_multiline_dotall(&mut args);
flag_no_config(&mut args);
flag_no_ignore(&mut args);
flag_no_ignore_global(&mut args);
@@ -533,9 +583,12 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_no_ignore_vcs(&mut args);
flag_no_messages(&mut args);
flag_null(&mut args);
+ flag_null_data(&mut args);
flag_only_matching(&mut args);
flag_path_separator(&mut args);
flag_passthru(&mut args);
+ flag_pcre2(&mut args);
+ flag_pcre2_unicode(&mut args);
flag_pre(&mut args);
flag_pretty(&mut args);
flag_quiet(&mut args);
@@ -548,6 +601,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_stats(&mut args);
flag_text(&mut args);
flag_threads(&mut args);
+ flag_trim(&mut args);
flag_type(&mut args);
flag_type_add(&mut args);
flag_type_clear(&mut args);
@@ -809,14 +863,53 @@ This overrides the --count flag. Note that when --count is combined with
args.push(arg);
}
+fn flag_crlf(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Support CRLF line terminators (useful on Windows).";
+ const LONG: &str = long!("\
+When enabled, ripgrep will treat CRLF ('\\r\\n') as a line terminator instead
+of just '\\n'.
+
+Principally, this permits '$' in regex patterns to match just before CRLF
+instead of just before LF. The underlying regex engine may not support this
+natively, so ripgrep will translate all instances of '$' to '(?:\\r??$)'. This
+may produce slightly different than desired match offsets. It is intended as a
+work-around until the regex engine supports this natively.
+
+CRLF support can be disabled with --no-crlf.
+");
+ let arg = RGArg::switch("crlf")
+ .help(SHORT).long_help(LONG)
+ .overrides("no-crlf")
+ .overrides("null-data");
+ args.push(arg);
+
+ let arg = RGArg::switch("no-crlf")
+ .hidden()
+ .overrides("crlf");
+ args.push(arg);
+}
+
fn flag_debug(args: &mut Vec<RGArg>) {
const SHORT: &str = "Show debug messages.";
const LONG: &str = long!("\
Show debug messages. Please use this when filing a bug report.
+
+The --debug flag is generally useful for figuring out why ripgrep skipped
+searching a particular file. The debug messages should mention all files
+skipped and why they were skipped.
+
+To get even more debug output, use the --trace flag, which implies --debug
+along with additional trace data. With --trace, the output could be quite
+large and is generally more useful for development.
");
let arg = RGArg::switch("debug")
.help(SHORT).long_help(LONG);
args.push(arg);
+
+ let arg = RGArg::switch("trace")
+ .hidden()
+ .overrides("debug");
+ args.push(arg);
}
fn flag_dfa_size_limit(args: &mut Vec<RGArg>) {
@@ -842,10 +935,17 @@ default value is 'auto', which will cause ripgrep to do a best effort automatic
detection of encoding on a per-file basis. Other supported values can be found
in the list of labels here:
https://encoding.spec.whatwg.org/#concept-encoding-get
+
+This flag can be disabled with --no-encoding.
");
let arg = RGArg::flag("encoding", "ENCODING").short("E")
.help(SHORT).long_help(LONG);
args.push(arg);
+
+ let arg = RGArg::switch("no-encoding")
+ .hidden()
+ .overrides("encoding");
+ args.push(arg);
}
fn flag_file(args: &mut Vec<RGArg>) {
@@ -1071,6 +1171,66 @@ Invert matching. Show lines that do not match the given patterns.
args.push(arg);
}
+fn flag_json(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Show search results in a JSON Lines format.";
+ const LONG: &str = long!("\
+Enable printing results in a JSON Lines format.
+
+When this flag is provided, ripgrep will emit a sequence of messages, each
+encoded as a JSON object, where there are five different message types:
+
+**begin** - A message that indicates a file is being searched and contains at
+least one match.
+
+**end** - A message the indicates a file is done being searched. This message
+also include summary statistics about the search for a particular file.
+
+**match** - A message that indicates a match was found. This includes the text
+and offsets of the match.
+
+**context** - A message that indicates a contextual line was found. This
+includes the text of the line, along with any match information if the search
+was inverted.
+
+**summary** - The final message emitted by ripgrep that contains summary
+statistics about the search across all files.
+
+Since file paths or the contents of files are not guaranteed to be valid UTF-8
+and JSON itself must be representable by a Unicode encoding, ripgrep will emit
+all data elements as objects with one of two keys: 'text' or 'bytes'. 'text' is
+a normal JSON string when the data is valid UTF-8 while 'bytes' is the base64
+encoded contents of the data.
+
+The JSON Lines format is only supported for showing search results. It cannot
+be used with other flags that emit other types of output, such as --files,
+--files-with-matches, --files-without-match, --count or --count-matches.
+ripgrep will report an error if any of the aforementioned flags are used in
+concert with --json.
+
+Other flags that control aspects of the standard output such as
+--only-matching, --heading, --replace, --max-columns, etc., have no effect
+when --json is set.
+
+A more complete description of the JSON format used can be found here:
+https://docs.rs/grep-printer/*/grep_printer/struct.JSON.html
+
+The JSON Lines format can be disabled with --no-json.
+");
+ let arg = RGArg::switch("json")
+ .help(SHORT).long_help(LONG)
+ .overrides("no-json")
+ .conflicts(&[
+ "count", "count-matches",
+ "files", "files-with-matches", "files-without-match",
+ ]);
+ args.push(arg);
+
+ let arg = RGArg::switch("no-json")
+ .hidden()
+ .overrides("json");
+ args.push(arg);
+}
+
fn flag_line_number(args: &mut Vec<RGArg>) {
const SHORT: &str = "Show line numbers.";
const LONG: &str = long!("\
@@ -1198,6 +1358,79 @@ This flag overrides --mmap.
args.push(arg);
}
+fn flag_multiline(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Enable matching across multiple lines.";
+ const LONG: &str = long!("\
+Enable matching across multiple lines.
+
+When multiline mode is enabled, ripgrep will lift the restriction that a match
+cannot include a line terminator. For example, when multiline mode is not
+enabled (the default), then the regex '\\p{any}' will match any Unicode
+codepoint other than '\\n'. Similarly, the regex '\\n' is explicitly forbidden,
+and if you try to use it, ripgrep will return an error. However, when multiline
+mode is enabled, '\\p{any}' will match any Unicode codepoint, including '\\n',
+and regexes like '\\n' are permitted.
+
+An important caveat is that multiline mode does not change the match semantics
+of '.'. Namely, in most regex matchers, a '.' will by default match any
+character other than '\\n', and this is true in ripgrep as well. In order to
+make '.' match '\\n', you must enable the \"dot all\" flag inside the regex.
+For example, both '(?s).' and '(?s:.)' have the same semantics, where '.' will
+match any character, including '\\n'. Alternatively, the '--multiline-dotall'
+flag may be passed to make the \"dot all\" behavior the default. This flag only
+applies when multiline search is enabled.
+
+There is no limit on the number of the lines that a single match can span.
+
+**WARNING**: Because of how the underlying regex engine works, multiline
+searches may be slower than normal line-oriented searches, and they may also
+use more memory. In particular, when multiline mode is enabled, ripgrep
+requires that each file it searches is laid out contiguously in memory
+(either by reading it onto the heap or by memory-mapping it). Things that
+cannot be memory-mapped (such as stdin) will be consumed until EOF before
+searching can begin. In general, ripgrep will only do these things when
+necessary. Specifically, if the --multiline flag is provided but the regex
+does not contain patterns that would match '\\n' characters, then ripgrep
+will automatically avoid reading each file into memory before searching it.
+Nevertheless, if you only care about matches spanning at most one line, then it
+is always better to disable multiline mode.
+
+This flag can be disabled with --no-multiline.
+");
+ let arg = RGArg::switch("multiline").short("U")
+ .help(SHORT).long_help(LONG)
+ .overrides("no-multiline");
+ args.push(arg);
+
+ let arg = RGArg::switch("no-multiline")
+ .hidden()
+ .overrides("multiline");
+ args.push(arg);
+}
+
+fn flag_multiline_dotall(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Make '.' match new lines when multiline is enabled.";
+ const LONG: &str = long!("\
+This flag enables \"dot all\" in your regex pattern, which causes '.' to match
+newlines when multiline searching is enabled. This flag has no effect if
+multiline searching isn't enabled with the --multiline flag.
+
+Normally, a '.' will match any character except newlines. While this behavior
+typically isn't relevant for line-oriented matching (since matches can span at
+most one line), this can be useful when searching with the -U/--multiline flag.
+By default, the multiline mode runs without this flag.
+
+This flag is generally intended to be used in an alias or your ripgrep config
+file if you prefer \"dot all\" semantics by default. Note that regardless of
+whether this flag is used, \"dot all\" semantics can still be controlled via
+inline flags in the regex pattern itself, e.g., '(?s:.)' always enables \"dot
+all\" where as '(?-s:.)' always disables \"dot all\".
+");
+ let arg = RGArg::switch("multiline-dotall")
+ .help(SHORT).long_help(LONG);
+ args.push(arg);
+}
+
fn flag_no_config(args: &mut Vec<RGArg>) {
const SHORT: &str = "Never read configuration files.";
const LONG: &str = long!("\
@@ -1340,6 +1573,29 @@ for use with xargs.
args.push(arg);
}
+fn flag_null_data(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Use NUL as a line terminator instead of \\n.";
+ const LONG: &str = long!("\
+Enabling this option causes ripgrep to use NUL as a line terminator instead of
+the default of '\\n'.
+
+This is useful when searching large binary files that would otherwise have very
+long lines if '\\n' were used as the line terminator. In particular, ripgrep
+requires that, at a minimum, each line must fit into memory. Use NUL instead
+can be a useful stopgap to keep memory requirements low and avoid OOM (out of
+memory) conditions.
+
+This is also useful for processing NUL delimited data, such that that emitted
+when using ripgrep's -0/--null flag or find's --print0 flag.
+
+Using this flag implies -a/--text.
+");
+ let arg = RGArg::switch("null-data")
+ .help(SHORT).long_help(LONG)
+ .overrides("crlf");
+ args.push(arg);
+}
+
fn flag_only_matching(args: &mut Vec<RGArg>) {
const SHORT: &str = "Print only matches parts of a line.";
const LONG: &str = long!("\
@@ -1374,13 +1630,76 @@ the empty string. For example, if you are searching using 'rg foo' then using
'rg \"^|foo\"' instead will emit every line in every file searched, but only
occurrences of 'foo' will be highlighted. This flag enables the same behavior
without needing to modify the pattern.
-
-This flag conflicts with the --only-matching and --replace flags.
");
let arg = RGArg::switch("passthru")
.help(SHORT).long_help(LONG)
- .alias("passthrough")
- .conflicts(&["only-matching", "replace"]);
+ .alias("passthrough");
+ args.push(arg);
+}
+
+fn flag_pcre2(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Enable PCRE2 matching.";
+ const LONG: &str = long!("\
+When this flag is present, ripgrep will use the PCRE2 regex engine instead of
+its default regex engine.
+
+This is generally useful when you want to use features such as look-around
+or backreferences.
+
+Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in
+your build of ripgrep, then using this flag will result in ripgrep printing
+an error message and exiting.
+
+This flag can be disabled with --no-pcre2.
+");
+ let arg = RGArg::switch("pcre2").short("P")
+ .help(SHORT).long_help(LONG)
+ .overrides("no-pcre2");
+ args.push(arg);
+
+ let arg = RGArg::switch("no-pcre2")
+ .hidden()
+ .overrides("pcre2");
+ args.push(arg);
+}
+
+fn flag_pcre2_unicode(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Enable Unicode mode for PCRE2 matching.";
+ const LONG: &str = long!("\
+When PCRE2 matching is enabled, this flag will enable Unicode mode. If PCRE2
+matching is not enabled, then this flag has no effect.
+
+This flag is enabled by default when PCRE2 matching is enabled.
+
+When PCRE2's Unicode mode is enabled several different types of patterns become
+Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D', '\\s'
+and '\\S'. Similarly, the '.' meta character will match any Unicode codepoint
+instead of any byte. Caseless matching will also use Unicode simple case
+folding instead of ASCII-only case insensitivity.
+
+Unicode mode in PCRE2 represents a critical trade off in the user experience
+of ripgrep. In particular, unlike the default regex engine, PCRE2 does not
+support the ability to search possibly invalid UTF-8 with Unicode features
+enabled. Instead, PCRE2 *requires* that everything it searches when Unicode
+mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes
+of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode
+mode enabled and you attempt to search invalid UTF-8, then the search for that
+file will hault and print an error. For this reason, when PCRE2's Unicode mode
+is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by
+replacing them with the Unicode replacement codepoint.
+
+If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode
+is enabled, then pass the --no-encoding flag to disable all transcoding.
+
+This flag can be disabled with --no-pcre2-unicode.
+");
+ let arg = RGArg::switch("pcre2-unicode")
+ .help(SHORT).long_help(LONG);
+ args.push(arg);
+
+ let arg = RGArg::switch("no-pcre2-unicode")
+ .hidden()
+ .overrides("pcre2-unicode");
args.push(arg);
}
@@ -1592,11 +1911,18 @@ searched, and the time taken for the entire search to complete.
This set of aggregate statistics may expand over time.
Note that this flag has no effect if --files, --files-with-matches or
---files-without-match is passed.");
+--files-without-match is passed.
+This flag can be disabled with --no-stats.
+");
let arg = RGArg::switch("stats")
- .help(SHORT).long_help(LONG);
+ .help(SHORT).long_help(LONG)
+ .overrides("no-stats");
+ args.push(arg);
+ let arg = RGArg::switch("no-stats")
+ .hidden()
+ .overrides("stats");
args.push(arg);
}
@@ -1639,6 +1965,25 @@ causes ripgrep to choose the thread count using heuristics.
args.push(arg);
}
+fn flag_trim(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Trim prefixed whitespace from matches.";
+ const LONG: &str = long!("\
+When set, all ASCII whitespace at the beginning of each line printed will be
+trimmed.
+
+This flag can be disabled with --no-trim.
+");
+ let arg = RGArg::switch("trim")
+ .help(SHORT).long_help(LONG)
+ .overrides("no-trim");
+ args.push(arg);
+
+ let arg = RGArg::switch("no-trim")
+ .hidden()
+ .overrides("trim");
+ args.push(arg);
+}
+
fn flag_type(args: &mut Vec<RGArg>) {
const SHORT: &str = "Only search files matching TYPE.";
const LONG: &str = long!("\
diff --git a/src/args.rs b/src/args.rs
index 10b9e557..20e67b67 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -1,89 +1,117 @@
use std::cmp;
use std::env;
use std::ffi::OsStr;
-use std::fs;
+use std::fs::File;
use std::io::{self, BufRead};
use std::path::{Path, PathBuf};
use std::sync::Arc;
-use std::sync::atomic::{AtomicBool, Ordering};
+use atty;
use clap;
-use encoding_rs::Encoding;
-use grep::{Grep, GrepBuilder};
+use grep::matcher::LineTerminator;
+#[cfg(feature = "pcre2")]
+use grep::pcre2::{
+ RegexMatcher as PCRE2RegexMatcher,
+ RegexMatcherBuilder as PCRE2RegexMatcherBuilder,
+};
+use grep::printer::{
+ ColorSpecs, Stats,
+ JSON, JSONBuilder,
+ Standard, StandardBuilder,
+ Summary, SummaryBuilder, SummaryKind,
+};
+use grep::regex::{
+ RegexMatcher as RustRegexMatcher,
+ RegexMatcherBuilder as RustRegexMatcherBuilder,
+};
+use grep::searcher::{
+ BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder,
+};
+use ignore::overrides::{Override, OverrideBuilder};
+use ignore::types::{FileTypeDef, Types, TypesBuilder};
+use ignore::{Walk, WalkBuilder, WalkParallel};
use log;
use num_cpus;
-use regex;
-use same_file;
-use termcolor;
+use path_printer::{PathPrinter, PathPrinterBuilder};
+use regex::{self, Regex};
+use same_file::Handle;
+use termcolor::{
+ WriteColor,
+ BufferedStandardStream, BufferWriter, ColorChoice, StandardStream,
+};
use app;
-use atty;
-use ignore::overrides::{Override, OverrideBuilder};
-use ignore::types::{FileTypeDef, Types, TypesBuilder};
-use ignore;
-use printer::{ColorSpecs, Printer};
-use unescape::{escape, unescape};
-use worker::{Worker, WorkerBuilder};
-
use config;
use logger::Logger;
+use messages::{set_messages, set_ignore_messages};
+use search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder};
+use subject::SubjectBuilder;
+use unescape::{escape, unescape};
use Result;
-/// `Args` are transformed/normalized from `ArgMatches`.
-#[derive(Debug)]
-pub struct Args {
+/// The command that ripgrep should execute based on the command line
+/// configuration.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Command {
+ /// Search using exactly one thread.
+ Search,
+ /// Search using possibly many threads.
+ SearchParallel,
+ /// The command line parameters suggest that a search should occur, but
+ /// ripgrep knows that a match can never be found (e.g., no given patterns
+ /// or --max-count=0).
+ SearchNever,
+ /// Show the files that would be searched, but don't actually search them,
+ /// and use exactly one thread.
+ Files,
+ /// Show the files that would be searched, but don't actually search them,
+ /// and perform directory traversal using possibly many threads.
+ FilesParallel,
+ /// List all file type definitions configured, including the default file
+ /// types and any additional file types added to the command line.
+ Types,
+}
+
+impl Command {
+ /// Returns true if and only if this command requires executing a search.
+ fn is_search(&self) -> bool {
+ use self::Command::*;
+
+ match *self {
+ Search | SearchParallel => true,
+ SearchNever | Files | FilesParallel | Types => false,
+ }
+ }
+}
+
+/// The primary configuration object used throughout ripgrep. It provides a
+/// high-level convenient interface to the provided command line arguments.
+///
+/// An `Args` object is cheap to clone and can be used from multiple threads
+/// simultaneously.
+#[derive(Clone, Debug)]
+pub struct Args(Arc<ArgsImp>);
+
+#[derive(Clone, Debug)]
+struct ArgsImp {
+ /// Mid-to-low level routines for extracting CLI arguments.
+ matches: ArgMatches,
+ /// The patterns provided at the command line and/or via the -f/--file
+ /// flag. This may be empty.
+ patterns: Vec<String>,
+ /// A matcher built from the patterns.
+ ///
+ /// It's important that this is only built once, since building this goes
+ /// through regex compilation and various types of analyses. That is, if
+ /// you need many of theses (one per thread, for example), it is better to
+ /// build it once and then clone it.
+ matcher: PatternMatcher,
+ /// The paths provided at the command line. This is guaranteed to be
+ /// non-empty. (If no paths are provided, then a default path is created.)
paths: Vec<PathBuf>,
- after_context: usize,
- before_context: usize,
- byte_offset: bool,
- can_match: bool,
- color_choice: termcolor::ColorChoice,
- colors: ColorSpecs,
- column: bool,
- context_separator: Vec<u8>,
- count: bool,
- count_matches: bool,
- encoding: Option<&'static Encoding>,
- files_with_matches: bool,
- files_without_matches: bool,
- eol: u8,
- files: bool,
- follow: bool,
- glob_overrides: Override,
- grep: Grep,
- heading: bool,
- hidden: bool,
- ignore_files: Vec<PathBuf>,
- invert_match: bool,
- line_number: bool,
- line_per_match: bool,
- max_columns: Option<usize>,
- max_count: Option<u64>,
- max_depth: Option<usize>,
- max_filesize: Option<u64>,
- mmap: bool,
- no_ignore: bool,
- no_ignore_global: bool,
- no_ignore_messages: bool,
- no_ignore_parent: bool,
- no_ignore_vcs: bool,
- no_messages: bool,
- null: bool,
- only_matching: bool,
- path_separator: Option<u8>,
- quiet: bool,
- quiet_matched: QuietMatched,
- replace: Option<Vec<u8>>,
- sort_files: bool,
- stdout_handle: Option<same_file::Handle>,
- text: bool,
- threads: usize,
- type_list: bool,
- types: Types,
- with_filename: bool,
- search_zip_files: bool,
- preprocessor: Option<PathBuf>,
- stats: bool
+ /// Returns true if and only if `paths` had to be populated with a single
+ /// default path.
+ using_default_path: bool,
}
impl Args {
@@ -100,46 +128,262 @@ impl Args {
// trying to parse config files. If a config file exists and has
// arguments, then we re-parse argv, otherwise we just use the matches
// we have here.
- let early_matches = ArgMatches(app::app().get_matches());
+ let early_matches = ArgMatches::new(app::app().get_matches());
+ set_messages(!early_matches.is_present("no-messages"));
+ set_ignore_messages(!early_matches.is_present("no-ignore-messages"));
if let Err(err) = Logger::init() {
- errored!("failed to initialize logger: {}", err);
+ return Err(format!("failed to initialize logger: {}", err).into());
}
- if early_matches.is_present("debug") {
+ if early_matches.is_present("trace") {
+ log::set_max_level(log::LevelFilter::Trace);
+ } else if early_matches.is_present("debug") {
log::set_max_level(log::LevelFilter::Debug);
} else {
log::set_max_level(log::LevelFilter::Warn);
}
- let matches = Args::matches(early_matches);
+ let matches = early_matches.reconfigure();
// The logging level may have changed if we brought in additional
// arguments from a configuration file, so recheck it and set the log
// level as appropriate.
- if matches.is_present("debug") {
+ if matches.is_present("trace") {
+ log::set_max_level(log::LevelFilter::Trace);
+ } else if matches.is_present("debug") {
log::set_max_level(log::LevelFilter::Debug);
} else {
log::set_max_level(log::LevelFilter::Warn);
}
+ set_messages(!matches.is_present("no-messages"));
+ set_ignore_messages(!matches.is_present("no-ignore-messages"));
matches.to_args()
}
- /// Run clap and return the matches. If clap determines a problem with the
- /// user provided arguments (or if --help or --version are given), then an
- /// error/usage/version will be printed and the process will exit.
+ /// Return direct access to command line arguments.
+ fn matches(&self) -> &ArgMatches {
+ &self.0.matches
+ }
+
+ /// Return the patterns found in the command line arguments. This includes
+ /// patterns read via the -f/--file flags.
+ fn patterns(&self) -> &[String] {
+ &self.0.patterns
+ }
+
+ /// Return the matcher builder from the patterns.
+ fn matcher(&self) -> &PatternMatcher {
+ &self.0.matcher
+ }
+
+ /// Return the paths found in the command line arguments. This is
+ /// guaranteed to be non-empty. In the case where no explicit arguments are
+ /// provided, a single default path is provided automatically.
+ fn paths(&self) -> &[PathBuf] {
+ &self.0.paths
+ }
+
+ /// Returns true if and only if `paths` had to be populated with a default
+ /// path, which occurs only when no paths were given as command line
+ /// arguments.
+ fn using_default_path(&self) -> bool {
+ self.0.using_default_path
+ }
+
+ /// Return the printer that should be used for formatting the output of
+ /// search results.
+ ///
+ /// The returned printer will write results to the given writer.
+ fn printer<W: WriteColor>(&self, wtr: W) -> Result<Printer<W>> {
+ match self.matches().output_kind() {
+ OutputKind::Standard => {
+ let separator_search = self.command()? == Command::Search;
+ self.matches()
+ .printer_standard(self.paths(), wtr, separator_search)
+ .map(Printer::Standard)
+ }
+ OutputKind::Summary => {
+ self.matches()
+ .printer_summary(self.paths(), wtr)
+ .map(Printer::Summary)
+ }
+ OutputKind::JSON => {
+ self.matches()
+ .printer_json(wtr)
+ .map(Printer::JSON)
+ }
+ }
+ }
+}
+
+/// High level public routines for building data structures used by ripgrep
+/// from command line arguments.
+impl Args {
+ /// Create a new buffer writer for multi-threaded printing with color
+ /// support.
+ pub fn buffer_writer(&self) -> Result<BufferWriter> {
+ let mut wtr = BufferWriter::stdout(self.matches().color_choice());
+ wtr.separator(self.matches().file_separator()?);
+ Ok(wtr)
+ }
+
+ /// Return the high-level command that ripgrep should run.
+ pub fn command(&self) -> Result<Command> {
+ let is_one_search = self.matches().is_one_search(self.paths());
+ let threads = self.matches().threads()?;
+ let one_thread = is_one_search || threads == 1;
+
+ Ok(if self.matches().is_present("type-list") {
+ Command::Types
+ } else if self.matches().is_present("files") {
+ if one_thread {
+ Command::Files
+ } else {
+ Command::FilesParallel
+ }
+ } else if self.matches().can_never_match(self.patterns()) {
+ Command::SearchNever
+ } else if one_thread {
+ Command::Search
+ } else {
+ Command::SearchParallel
+ })
+ }
+
+ /// Builder a path printer that can be used for printing just file paths,
+ /// with optional color support.
+ ///
+ /// The printer will print paths to the given writer.
+ pub fn path_printer<W: WriteColor>(
+ &self,
+ wtr: W,
+ ) -> Result<PathPrinter<W>> {
+ let mut builder = PathPrinterBuilder::new();
+ builder
+ .color_specs(self.matches().color_specs()?)
+ .separator(self.matches().path_separator()?)
+ .terminator(self.matches().path_terminator().unwrap_or(b'\n'));
+ Ok(builder.build(wtr))
+ }
+
+ /// Returns true if and only if the search should quit after finding the
+ /// first match.
+ pub fn quit_after_match(&self) -> Result<bool> {
+ Ok(self.matches().is_present("quiet") && self.stats()?.is_none())
+ }
+
+ /// Build a worker for executing searches.
+ ///
+ /// Search results are written to the given writer.
+ pub fn search_worker<W: WriteColor>(
+ &self,
+ wtr: W,
+ ) -> Result<SearchWorker<W>> {
+ let matcher = self.matcher().clone();
+ let printer = self.printer(wtr)?;
+ let searcher = self.matches().searcher(self.paths())?;
+ let mut builder = SearchWorkerBuilder::new();
+ builder
+ .json_stats(self.matches().is_present("json"))
+ .preprocessor(self.matches().preprocessor