15 files changed, 2568 insertions, 4558 deletions
diff --git a/src/app.rs b/src/app.rs
index 24851c3b..a0b036d5 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -2,8 +2,8 @@
 // including some light validation.
 //
 // This module is purposely written in a bare-bones way, since it is included
-// in ripgrep's build.rs file as a way to generate completion files for common
-// shells.
+// in ripgrep's build.rs file as a way to generate a man page and completion
+// files for common shells.
 //
 // The only other place that ripgrep deals with clap is in src/args.rs, which
 // is where we read clap's configuration from the end user's arguments and turn
@@ -82,7 +82,34 @@ pub fn app() -> App<'static, 'static> {
 /// the RIPGREP_BUILD_GIT_HASH env var is inspect for it. If that isn't set,
 /// then a revision hash is not included in the version string returned.
 pub fn long_version(revision_hash: Option<&str>) -> String {
-    // Let's say whether faster CPU instructions are enabled or not.
+    // Do we have a git hash?
+    // (Yes, if ripgrep was built on a machine with `git` installed.)
+    let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) {
+        None => String::new(),
+        Some(githash) => format!(" (rev {})", githash),
+    };
+    // Put everything together.
+    let runtime = runtime_cpu_features();
+    if runtime.is_empty() {
+        format!(
+            "{}{}\n{} (compiled)",
+            crate_version!(),
+            hash,
+            compile_cpu_features().join(" ")
+        )
+    } else {
+        format!(
+            "{}{}\n{} (compiled)\n{} (runtime)",
+            crate_version!(),
+            hash,
+            compile_cpu_features().join(" "),
+            runtime.join(" ")
+        )
+    }
+}
+
+/// Returns the relevant CPU features enabled at compile time.
+fn compile_cpu_features() -> Vec<&'static str> {
     let mut features = vec![];
     if cfg!(feature = "simd-accel") {
         features.push("+SIMD");
@@ -94,14 +121,33 @@ pub fn long_version(revision_hash: Option<&str>) -> String {
     } else {
         features.push("-AVX");
     }
-    // Do we have a git hash?
-    // (Yes, if ripgrep was built on a machine with `git` installed.)
-    let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) {
-        None => String::new(),
-        Some(githash) => format!(" (rev {})", githash),
-    };
-    // Put everything together.
-    format!("{}{}\n{}", crate_version!(), hash, features.join(" "))
+    features
+}
+
+/// Returns the relevant CPU features enabled at runtime.
+#[cfg(all(ripgrep_runtime_cpu, target_arch = "x86_64"))]
+fn runtime_cpu_features() -> Vec<&'static str> {
+    // This is kind of a dirty violation of abstraction, since it assumes
+    // knowledge about what specific SIMD features are being used.
+
+    let mut features = vec![];
+    if is_x86_feature_detected!("ssse3") {
+        features.push("+SIMD");
+    } else {
+        features.push("-SIMD");
+    }
+    if is_x86_feature_detected!("avx2") {
+        features.push("+AVX");
+    } else {
+        features.push("-AVX");
+    }
+    features
+}
+
+/// Returns the relevant CPU features enabled at runtime.
+#[cfg(not(all(ripgrep_runtime_cpu, target_arch = "x86_64")))]
+fn runtime_cpu_features() -> Vec<&'static str> {
+    vec![]
 }
 
 /// Arg is a light alias for a clap::Arg that is specialized to compile time
@@ -478,7 +524,7 @@ impl RGArg {
     }
 }
 
-// We add an extra space to long descriptions so that a black line is inserted
+// We add an extra space to long descriptions so that a blank line is inserted
 // between flag descriptions in --help output.
 macro_rules! long {
     ($lit:expr) => { concat!($lit, " ") }
@@ -502,6 +548,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
     flag_context_separator(&mut args);
     flag_count(&mut args);
     flag_count_matches(&mut args);
+    flag_crlf(&mut args);
     flag_debug(&mut args);
     flag_dfa_size_limit(&mut args);
     flag_encoding(&mut args);
@@ -518,6 +565,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
     flag_ignore_case(&mut args);
     flag_ignore_file(&mut args);
     flag_invert_match(&mut args);
+    flag_json(&mut args);
     flag_line_number(&mut args);
     flag_line_regexp(&mut args);
     flag_max_columns(&mut args);
@@ -525,6 +573,8 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
     flag_max_depth(&mut args);
     flag_max_filesize(&mut args);
     flag_mmap(&mut args);
+    flag_multiline(&mut args);
+    flag_multiline_dotall(&mut args);
     flag_no_config(&mut args);
     flag_no_ignore(&mut args);
     flag_no_ignore_global(&mut args);
@@ -533,9 +583,12 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
     flag_no_ignore_vcs(&mut args);
     flag_no_messages(&mut args);
     flag_null(&mut args);
+    flag_null_data(&mut args);
     flag_only_matching(&mut args);
     flag_path_separator(&mut args);
     flag_passthru(&mut args);
+    flag_pcre2(&mut args);
+    flag_pcre2_unicode(&mut args);
     flag_pre(&mut args);
     flag_pretty(&mut args);
     flag_quiet(&mut args);
@@ -548,6 +601,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
     flag_stats(&mut args);
     flag_text(&mut args);
     flag_threads(&mut args);
+    flag_trim(&mut args);
     flag_type(&mut args);
     flag_type_add(&mut args);
     flag_type_clear(&mut args);
@@ -809,14 +863,53 @@ This overrides the --count flag. Note that when --count is combined with
     args.push(arg);
 }
 
+fn flag_crlf(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Support CRLF line terminators (useful on Windows).";
+    const LONG: &str = long!("\
+When enabled, ripgrep will treat CRLF ('\\r\\n') as a line terminator instead
+of just '\\n'.
+
+Principally, this permits '$' in regex patterns to match just before CRLF
+instead of just before LF. The underlying regex engine may not support this
+natively, so ripgrep will translate all instances of '$' to '(?:\\r??$)'. This
+may produce slightly different than desired match offsets. It is intended as a
+work-around until the regex engine supports this natively.
+
+CRLF support can be disabled with --no-crlf.
+");
+    let arg = RGArg::switch("crlf")
+        .help(SHORT).long_help(LONG)
+        .overrides("no-crlf")
+        .overrides("null-data");
+    args.push(arg);
+
+    let arg = RGArg::switch("no-crlf")
+        .hidden()
+        .overrides("crlf");
+    args.push(arg);
+}
+
 fn flag_debug(args: &mut Vec<RGArg>) {
     const SHORT: &str = "Show debug messages.";
     const LONG: &str = long!("\
 Show debug messages. Please use this when filing a bug report.
+
+The --debug flag is generally useful for figuring out why ripgrep skipped
+searching a particular file. The debug messages should mention all files
+skipped and why they were skipped.
+
+To get even more debug output, use the --trace flag, which implies --debug
+along with additional trace data. With --trace, the output could be quite
+large and is generally more useful for development.
 ");
     let arg = RGArg::switch("debug")
         .help(SHORT).long_help(LONG);
     args.push(arg);
+
+    let arg = RGArg::switch("trace")
+        .hidden()
+        .overrides("debug");
+    args.push(arg);
 }
 
 fn flag_dfa_size_limit(args: &mut Vec<RGArg>) {
@@ -842,10 +935,17 @@ default value is 'auto', which will cause ripgrep to do a best effort automatic
 detection of encoding on a per-file basis. Other supported values can be found
 in the list of labels here:
 https://encoding.spec.whatwg.org/#concept-encoding-get
+
+This flag can be disabled with --no-encoding.
 ");
     let arg = RGArg::flag("encoding", "ENCODING").short("E")
         .help(SHORT).long_help(LONG);
     args.push(arg);
+
+    let arg = RGArg::switch("no-encoding")
+        .hidden()
+        .overrides("encoding");
+    args.push(arg);
 }
 
 fn flag_file(args: &mut Vec<RGArg>) {
@@ -1071,6 +1171,66 @@ Invert matching. Show lines that do not match the given patterns.
     args.push(arg);
 }
 
+fn flag_json(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Show search results in a JSON Lines format.";
+    const LONG: &str = long!("\
+Enable printing results in a JSON Lines format.
+
+When this flag is provided, ripgrep will emit a sequence of messages, each
+encoded as a JSON object, where there are five different message types:
+
+**begin** - A message that indicates a file is being searched and contains at
+least one match.
+
+**end** - A message the indicates a file is done being searched. This message
+also include summary statistics about the search for a particular file.
+
+**match** - A message that indicates a match was found. This includes the text
+and offsets of the match.
+
+**context** - A message that indicates a contextual line was found. This
+includes the text of the line, along with any match information if the search
+was inverted.
+
+**summary** - The final message emitted by ripgrep that contains summary
+statistics about the search across all files.
+
+Since file paths or the contents of files are not guaranteed to be valid UTF-8
+and JSON itself must be representable by a Unicode encoding, ripgrep will emit
+all data elements as objects with one of two keys: 'text' or 'bytes'. 'text' is
+a normal JSON string when the data is valid UTF-8 while 'bytes' is the base64
+encoded contents of the data.
+
+The JSON Lines format is only supported for showing search results. It cannot
+be used with other flags that emit other types of output, such as --files,
+--files-with-matches, --files-without-match, --count or --count-matches.
+ripgrep will report an error if any of the aforementioned flags are used in
+concert with --json.
+
+Other flags that control aspects of the standard output such as
+--only-matching, --heading, --replace, --max-columns, etc., have no effect
+when --json is set.
+
+A more complete description of the JSON format used can be found here:
+https://docs.rs/grep-printer/*/grep_printer/struct.JSON.html
+
+The JSON Lines format can be disabled with --no-json.
+");
+    let arg = RGArg::switch("json")
+        .help(SHORT).long_help(LONG)
+        .overrides("no-json")
+        .conflicts(&[
+            "count", "count-matches",
+            "files", "files-with-matches", "files-without-match",
+        ]);
+    args.push(arg);
+
+    let arg = RGArg::switch("no-json")
+        .hidden()
+        .overrides("json");
+    args.push(arg);
+}
+
 fn flag_line_number(args: &mut Vec<RGArg>) {
     const SHORT: &str = "Show line numbers.";
     const LONG: &str = long!("\
@@ -1198,6 +1358,79 @@ This flag overrides --mmap.
     args.push(arg);
 }
 
+fn flag_multiline(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Enable matching across multiple lines.";
+    const LONG: &str = long!("\
+Enable matching across multiple lines.
+
+When multiline mode is enabled, ripgrep will lift the restriction that a match
+cannot include a line terminator. For example, when multiline mode is not
+enabled (the default), then the regex '\\p{any}' will match any Unicode
+codepoint other than '\\n'. Similarly, the regex '\\n' is explicitly forbidden,
+and if you try to use it, ripgrep will return an error. However, when multiline
+mode is enabled, '\\p{any}' will match any Unicode codepoint, including '\\n',
+and regexes like '\\n' are permitted.
+
+An important caveat is that multiline mode does not change the match semantics
+of '.'. Namely, in most regex matchers, a '.' will by default match any
+character other than '\\n', and this is true in ripgrep as well. In order to
+make '.' match '\\n', you must enable the \"dot all\" flag inside the regex.
+For example, both '(?s).' and '(?s:.)' have the same semantics, where '.' will
+match any character, including '\\n'. Alternatively, the '--multiline-dotall'
+flag may be passed to make the \"dot all\" behavior the default. This flag only
+applies when multiline search is enabled.
+
+There is no limit on the number of the lines that a single match can span.
+
+**WARNING**: Because of how the underlying regex engine works, multiline
+searches may be slower than normal line-oriented searches, and they may also
+use more memory. In particular, when multiline mode is enabled, ripgrep
+requires that each file it searches is laid out contiguously in memory
+(either by reading it onto the heap or by memory-mapping it). Things that
+cannot be memory-mapped (such as stdin) will be consumed until EOF before
+searching can begin. In general, ripgrep will only do these things when
+necessary. Specifically, if the --multiline flag is provided but the regex
+does not contain patterns that would match '\\n' characters, then ripgrep
+will automatically avoid reading each file into memory before searching it.
+Nevertheless, if you only care about matches spanning at most one line, then it
+is always better to disable multiline mode.
+
+This flag can be disabled with --no-multiline.
+");
+    let arg = RGArg::switch("multiline").short("U")
+        .help(SHORT).long_help(LONG)
+        .overrides("no-multiline");
+    args.push(arg);
+
+    let arg = RGArg::switch("no-multiline")
+        .hidden()
+        .overrides("multiline");
+    args.push(arg);
+}
+
+fn flag_multiline_dotall(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Make '.' match new lines when multiline is enabled.";
+    const LONG: &str = long!("\
+This flag enables \"dot all\" in your regex pattern, which causes '.' to match
+newlines when multiline searching is enabled. This flag has no effect if
+multiline searching isn't enabled with the --multiline flag.
+
+Normally, a '.' will match any character except newlines. While this behavior
+typically isn't relevant for line-oriented matching (since matches can span at
+most one line), this can be useful when searching with the -U/--multiline flag.
+By default, the multiline mode runs without this flag.
+
+This flag is generally intended to be used in an alias or your ripgrep config
+file if you prefer \"dot all\" semantics by default. Note that regardless of
+whether this flag is used, \"dot all\" semantics can still be controlled via
+inline flags in the regex pattern itself, e.g., '(?s:.)' always enables \"dot
+all\" where as '(?-s:.)' always disables \"dot all\".
+");
+    let arg = RGArg::switch("multiline-dotall")
+        .help(SHORT).long_help(LONG);
+    args.push(arg);
+}
+
 fn flag_no_config(args: &mut Vec<RGArg>) {
     const SHORT: &str = "Never read configuration files.";
     const LONG: &str = long!("\
@@ -1340,6 +1573,29 @@ for use with xargs.
     args.push(arg);
 }
 
+fn flag_null_data(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Use NUL as a line terminator instead of \\n.";
+    const LONG: &str = long!("\
+Enabling this option causes ripgrep to use NUL as a line terminator instead of
+the default of '\\n'.
+
+This is useful when searching large binary files that would otherwise have very
+long lines if '\\n' were used as the line terminator. In particular, ripgrep
+requires that, at a minimum, each line must fit into memory. Use NUL instead
+can be a useful stopgap to keep memory requirements low and avoid OOM (out of
+memory) conditions.
+
+This is also useful for processing NUL delimited data, such that that emitted
+when using ripgrep's -0/--null flag or find's --print0 flag.
+
+Using this flag implies -a/--text.
+");
+    let arg = RGArg::switch("null-data")
+        .help(SHORT).long_help(LONG)
+        .overrides("crlf");
+    args.push(arg);
+}
+
 fn flag_only_matching(args: &mut Vec<RGArg>) {
     const SHORT: &str = "Print only matches parts of a line.";
     const LONG: &str = long!("\
@@ -1374,13 +1630,76 @@ the empty string. For example, if you are searching using 'rg foo' then using
 'rg \"^|foo\"' instead will emit every line in every file searched, but only
 occurrences of 'foo' will be highlighted. This flag enables the same behavior
 without needing to modify the pattern.
-
-This flag conflicts with the --only-matching and --replace flags.
 ");
     let arg = RGArg::switch("passthru")
         .help(SHORT).long_help(LONG)
-        .alias("passthrough")
-        .conflicts(&["only-matching", "replace"]);
+        .alias("passthrough");
+    args.push(arg);
+}
+
+fn flag_pcre2(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Enable PCRE2 matching.";
+    const LONG: &str = long!("\
+When this flag is present, ripgrep will use the PCRE2 regex engine instead of
+its default regex engine.
+
+This is generally useful when you want to use features such as look-around
+or backreferences.
+
+Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in
+your build of ripgrep, then using this flag will result in ripgrep printing
+an error message and exiting.
+
+This flag can be disabled with --no-pcre2.
+");
+    let arg = RGArg::switch("pcre2").short("P")
+        .help(SHORT).long_help(LONG)
+        .overrides("no-pcre2");
+    args.push(arg);
+
+    let arg = RGArg::switch("no-pcre2")
+        .hidden()
+        .overrides("pcre2");
+    args.push(arg);
+}
+
+fn flag_pcre2_unicode(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Enable Unicode mode for PCRE2 matching.";
+    const LONG: &str = long!("\
+When PCRE2 matching is enabled, this flag will enable Unicode mode. If PCRE2
+matching is not enabled, then this flag has no effect.
+
+This flag is enabled by default when PCRE2 matching is enabled.
+
+When PCRE2's Unicode mode is enabled several different types of patterns become
+Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D', '\\s'
+and '\\S'. Similarly, the '.' meta character will match any Unicode codepoint
+instead of any byte. Caseless matching will also use Unicode simple case
+folding instead of ASCII-only case insensitivity.
+
+Unicode mode in PCRE2 represents a critical trade off in the user experience
+of ripgrep. In particular, unlike the default regex engine, PCRE2 does not
+support the ability to search possibly invalid UTF-8 with Unicode features
+enabled. Instead, PCRE2 *requires* that everything it searches when Unicode
+mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes
+of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode
+mode enabled and you attempt to search invalid UTF-8, then the search for that
+file will hault and print an error. For this reason, when PCRE2's Unicode mode
+is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by
+replacing them with the Unicode replacement codepoint.
+
+If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode
+is enabled, then pass the --no-encoding flag to disable all transcoding.
+
+This flag can be disabled with --no-pcre2-unicode.
+");
+    let arg = RGArg::switch("pcre2-unicode")
+        .help(SHORT).long_help(LONG);
+    args.push(arg);
+
+    let arg = RGArg::switch("no-pcre2-unicode")
+        .hidden()
+        .overrides("pcre2-unicode");
     args.push(arg);
 }
 
@@ -1592,11 +1911,18 @@ searched, and the time taken for the entire search to complete.
 This set of aggregate statistics may expand over time.
 
 Note that this flag has no effect if --files, --files-with-matches or
---files-without-match is passed.");
+--files-without-match is passed.
 
+This flag can be disabled with --no-stats.
+");
     let arg = RGArg::switch("stats")
-        .help(SHORT).long_help(LONG);
+        .help(SHORT).long_help(LONG)
+        .overrides("no-stats");
+    args.push(arg);
 
+    let arg = RGArg::switch("no-stats")
+        .hidden()
+        .overrides("stats");
     args.push(arg);
 }
 
@@ -1639,6 +1965,25 @@ causes ripgrep to choose the thread count using heuristics.
     args.push(arg);
 }
 
+fn flag_trim(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Trim prefixed whitespace from matches.";
+    const LONG: &str = long!("\
+When set, all ASCII whitespace at the beginning of each line printed will be
+trimmed.
+
+This flag can be disabled with --no-trim.
+");
+    let arg = RGArg::switch("trim")
+        .help(SHORT).long_help(LONG)
+        .overrides("no-trim");
+    args.push(arg);
+
+    let arg = RGArg::switch("no-trim")
+        .hidden()
+        .overrides("trim");
+    args.push(arg);
+}
+
 fn flag_type(args: &mut Vec<RGArg>) {
     const SHORT: &str = "Only search files matching TYPE.";
     const LONG: &str = long!("\
diff --git a/src/args.rs b/src/args.rs
index 10b9e557..20e67b67 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -1,89 +1,117 @@
 use std::cmp;
 use std::env;
 use std::ffi::OsStr;
-use std::fs;
+use std::fs::File;
 use std::io::{self, BufRead};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
-use std::sync::atomic::{AtomicBool, Ordering};
 
+use atty;
 use clap;
-use encoding_rs::Encoding;
-use grep::{Grep, GrepBuilder};
+use grep::matcher::LineTerminator;
+#[cfg(feature = "pcre2")]
+use grep::pcre2::{
+    RegexMatcher as PCRE2RegexMatcher,
+    RegexMatcherBuilder as PCRE2RegexMatcherBuilder,
+};
+use grep::printer::{
+    ColorSpecs, Stats,
+    JSON, JSONBuilder,
+    Standard, StandardBuilder,
+    Summary, SummaryBuilder, SummaryKind,
+};
+use grep::regex::{
+    RegexMatcher as RustRegexMatcher,
+    RegexMatcherBuilder as RustRegexMatcherBuilder,
+};
+use grep::searcher::{
+    BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder,
+};
+use ignore::overrides::{Override, OverrideBuilder};
+use ignore::types::{FileTypeDef, Types, TypesBuilder};
+use ignore::{Walk, WalkBuilder, WalkParallel};
 use log;
 use num_cpus;
-use regex;
-use same_file;
-use termcolor;
+use path_printer::{PathPrinter, PathPrinterBuilder};
+use regex::{self, Regex};
+use same_file::Handle;
+use termcolor::{
+    WriteColor,
+    BufferedStandardStream, BufferWriter, ColorChoice, StandardStream,
+};
 
 use app;
-use atty;
-use ignore::overrides::{Override, OverrideBuilder};
-use ignore::types::{FileTypeDef, Types, TypesBuilder};
-use ignore;
-use printer::{ColorSpecs, Printer};
-use unescape::{escape, unescape};
-use worker::{Worker, WorkerBuilder};
-
 use config;
 use logger::Logger;
+use messages::{set_messages, set_ignore_messages};
+use search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder};
+use subject::SubjectBuilder;
+use unescape::{escape, unescape};
 use Result;
 
-/// `Args` are transformed/normalized from `ArgMatches`.
-#[derive(Debug)]
-pub struct Args {
+/// The command that ripgrep should execute based on the command line
+/// configuration.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Command {
+    /// Search using exactly one thread.
+    Search,
+    /// Search using possibly many threads.
+    SearchParallel,
+    /// The command line parameters suggest that a search should occur, but
+    /// ripgrep knows that a match can never be found (e.g., no given patterns
+    /// or --max-count=0).
+    SearchNever,
+    /// Show the files that would be searched, but don't actually search them,
+    /// and use exactly one thread.
+    Files,
+    /// Show the files that would be searched, but don't actually search them,
+    /// and perform directory traversal using possibly many threads.
+    FilesParallel,
+    /// List all file type definitions configured, including the default file
+    /// types and any additional file types added to the command line.
+    Types,
+}
+
+impl Command {
+    /// Returns true if and only if this command requires executing a search.
+    fn is_search(&self) -> bool {
+        use self::Command::*;
+
+        match *self {
+            Search | SearchParallel => true,
+            SearchNever | Files | FilesParallel | Types => false,
+        }
+    }
+}
+
+/// The primary configuration object used throughout ripgrep. It provides a
+/// high-level convenient interface to the provided command line arguments.
+///
+/// An `Args` object is cheap to clone and can be used from multiple threads
+/// simultaneously.
+#[derive(Clone, Debug)]
+pub struct Args(Arc<ArgsImp>);
+
+#[derive(Clone, Debug)]
+struct ArgsImp {
+    /// Mid-to-low level routines for extracting CLI arguments.
+    matches: ArgMatches,
+    /// The patterns provided at the command line and/or via the -f/--file
+    /// flag. This may be empty.
+    patterns: Vec<String>,
+    /// A matcher built from the patterns.
+    ///
+    /// It's important that this is only built once, since building this goes
+    /// through regex compilation and various types of analyses. That is, if
+    /// you need many of theses (one per thread, for example), it is better to
+    /// build it once and then clone it.
+    matcher: PatternMatcher,
+    /// The paths provided at the command line. This is guaranteed to be
+    /// non-empty. (If no paths are provided, then a default path is created.)
     paths: Vec<PathBuf>,
-    after_context: usize,
-    before_context: usize,
-    byte_offset: bool,
-    can_match: bool,
-    color_choice: termcolor::ColorChoice,
-    colors: ColorSpecs,
-    column: bool,
-    context_separator: Vec<u8>,
-    count: bool,
-    count_matches: bool,
-    encoding: Option<&'static Encoding>,
-    files_with_matches: bool,
-    files_without_matches: bool,
-    eol: u8,
-    files: bool,
-    follow: bool,
-    glob_overrides: Override,
-    grep: Grep,
-    heading: bool,
-    hidden: bool,
-    ignore_files: Vec<PathBuf>,
-    invert_match: bool,
-    line_number: bool,
-    line_per_match: bool,
-    max_columns: Option<usize>,
-    max_count: Option<u64>,
-    max_depth: Option<usize>,
-    max_filesize: Option<u64>,
-    mmap: bool,
-    no_ignore: bool,
-    no_ignore_global: bool,
-    no_ignore_messages: bool,
-    no_ignore_parent: bool,
-    no_ignore_vcs: bool,
-    no_messages: bool,
-    null: bool,
-    only_matching: bool,
-    path_separator: Option<u8>,
-    quiet: bool,
-    quiet_matched: QuietMatched,
-    replace: Option<Vec<u8>>,
-    sort_files: bool,
-    stdout_handle: Option<same_file::Handle>,
-    text: bool,
-    threads: usize,
-    type_list: bool,
-    types: Types,
-    with_filename: bool,
-    search_zip_files: bool,
-    preprocessor: Option<PathBuf>,
-    stats: bool
+    /// Returns true if and only if `paths` had to be populated with a single
+    /// default path.
+    using_default_path: bool,
 }
 
 impl Args {
@@ -100,46 +128,262 @@ impl Args {
         // trying to parse config files. If a config file exists and has
         // arguments, then we re-parse argv, otherwise we just use the matches
         // we have here.
-        let early_matches = ArgMatches(app::app().get_matches());
+        let early_matches = ArgMatches::new(app::app().get_matches());
+        set_messages(!early_matches.is_present("no-messages"));
+        set_ignore_messages(!early_matches.is_present("no-ignore-messages"));
 
         if let Err(err) = Logger::init() {
-            errored!("failed to initialize logger: {}", err);
+            return Err(format!("failed to initialize logger: {}", err).into());
         }
-        if early_matches.is_present("debug") {
+        if early_matches.is_present("trace") {
+            log::set_max_level(log::LevelFilter::Trace);
+        } else if early_matches.is_present("debug") {
             log::set_max_level(log::LevelFilter::Debug);
         } else {
             log::set_max_level(log::LevelFilter::Warn);
         }
 
-        let matches = Args::matches(early_matches);
+        let matches = early_matches.reconfigure();
         // The logging level may have changed if we brought in additional
         // arguments from a configuration file, so recheck it and set the log
         // level as appropriate.
-        if matches.is_present("debug") {
+        if matches.is_present("trace") {
+            log::set_max_level(log::LevelFilter::Trace);
+        } else if matches.is_present("debug") {
             log::set_max_level(log::LevelFilter::Debug);
         } else {
             log::set_max_level(log::LevelFilter::Warn);
         }
+        set_messages(!matches.is_present("no-messages"));
+        set_ignore_messages(!matches.is_present("no-ignore-messages"));
         matches.to_args()
     }
 
-    /// Run clap and return the matches. If clap determines a problem with the
-    /// user provided arguments (or if --help or --version are given), then an
-    /// error/usage/version will be printed and the process will exit.
+    /// Return direct access to command line arguments.
+    fn matches(&self) -> &ArgMatches {
+        &self.0.matches
+    }
+
+    /// Return the patterns found in the command line arguments. This includes
+    /// patterns read via the -f/--file flags.
+    fn patterns(&self) -> &[String] {
+        &self.0.patterns
+    }
+
+    /// Return the matcher builder from the patterns.
+    fn matcher(&self) -> &PatternMatcher {
+        &self.0.matcher
+    }
+
+    /// Return the paths found in the command line arguments. This is
+    /// guaranteed to be non-empty. In the case where no explicit arguments are
+    /// provided, a single default path is provided automatically.
+    fn paths(&self) -> &[PathBuf] {
+        &self.0.paths
+    }
+
+    /// Returns true if and only if `paths` had to be populated with a default
+    /// path, which occurs only when no paths were given as command line
+    /// arguments.
+    fn using_default_path(&self) -> bool {
+        self.0.using_default_path
+    }
+
+    /// Return the printer that should be used for formatting the output of
+    /// search results.
+    ///
+    /// The returned printer will write results to the given writer.
+    fn printer<W: WriteColor>(&self, wtr: W) -> Result<Printer<W>> {
+        match self.matches().output_kind() {
+            OutputKind::Standard => {
+                let separator_search = self.command()? == Command::Search;
+                self.matches()
+                    .printer_standard(self.paths(), wtr, separator_search)
+                    .map(Printer::Standard)
+            }
+            OutputKind::Summary => {
+                self.matches()
+                    .printer_summary(self.paths(), wtr)
+                    .map(Printer::Summary)
+            }
+            OutputKind::JSON => {
+                self.matches()
+                    .printer_json(wtr)
+                    .map(Printer::JSON)
+            }
+        }
+    }
+}
+
+/// High level public routines for building data structures used by ripgrep
+/// from command line arguments.
+impl Args {
+    /// Create a new buffer writer for multi-threaded printing with color
+    /// support.
+    pub fn buffer_writer(&self) -> Result<BufferWriter> {
+        let mut wtr = BufferWriter::stdout(self.matches().color_choice());
+        wtr.separator(self.matches().file_separator()?);
+        Ok(wtr)
+    }
+
+    /// Return the high-level command that ripgrep should run.
+    pub fn command(&self) -> Result<Command> {
+        let is_one_search = self.matches().is_one_search(self.paths());
+        let threads = self.matches().threads()?;
+        let one_thread = is_one_search || threads == 1;
+
+        Ok(if self.matches().is_present("type-list") {
+            Command::Types
+        } else if self.matches().is_present("files") {
+            if one_thread {
+                Command::Files
+            } else {
+                Command::FilesParallel
+            }
+        } else if self.matches().can_never_match(self.patterns()) {
+            Command::SearchNever
+        } else if one_thread {
+            Command::Search
+        } else {
+            Command::SearchParallel
+        })
+    }
+
+    /// Builder a path printer that can be used for printing just file paths,
+    /// with optional color support.
+    ///
+    /// The printer will print paths to the given writer.
+    pub fn path_printer<W: WriteColor>(
+        &self,
+        wtr: W,
+    ) -> Result<PathPrinter<W>> {
+        let mut builder = PathPrinterBuilder::new();
+        builder
+            .color_specs(self.matches().color_specs()?)
+            .separator(self.matches().path_separator()?)
+            .terminator(self.matches().path_terminator().unwrap_or(b'\n'));
+        Ok(builder.build(wtr))
+    }
+
+    /// Returns true if and only if the search should quit after finding the
+    /// first match.
+    pub fn quit_after_match(&self) -> Result<bool> {
+        Ok(self.matches().is_present("quiet") && self.stats()?.is_none())
+    }
+
+    /// Build a worker for executing searches.
+    ///
+    /// Search results are written to the given writer.
+    pub fn search_worker<W: WriteColor>(
+        &self,
+        wtr: W,
+    ) -> Result<SearchWorker<W>> {
+        let matcher = self.matcher().clone();
+        let printer = self.printer(wtr)?;
+        let searcher = self.matches().searcher(self.paths())?;
+        let mut builder = SearchWorkerBuilder::new();
+        builder
+            .json_stats(self.matches().is_present("json"))
+            .preprocessor(self.matches().preprocessor