summaryrefslogtreecommitdiffstats
path: root/crates/core/args.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/core/args.rs')
-rw-r--r--crates/core/args.rs1805
1 files changed, 1805 insertions, 0 deletions
diff --git a/crates/core/args.rs b/crates/core/args.rs
new file mode 100644
index 00000000..989f95c2
--- /dev/null
+++ b/crates/core/args.rs
@@ -0,0 +1,1805 @@
+use std::cmp;
+use std::env;
+use std::ffi::{OsStr, OsString};
+use std::fs;
+use std::io::{self, Write};
+use std::path::{Path, PathBuf};
+use std::process;
+use std::sync::Arc;
+use std::time::SystemTime;
+
+use clap;
+use grep::cli;
+use grep::matcher::LineTerminator;
+#[cfg(feature = "pcre2")]
+use grep::pcre2::{
+ RegexMatcher as PCRE2RegexMatcher,
+ RegexMatcherBuilder as PCRE2RegexMatcherBuilder,
+};
+use grep::printer::{
+ default_color_specs, ColorSpecs, JSONBuilder, Standard, StandardBuilder,
+ Stats, Summary, SummaryBuilder, SummaryKind, JSON,
+};
+use grep::regex::{
+ RegexMatcher as RustRegexMatcher,
+ RegexMatcherBuilder as RustRegexMatcherBuilder,
+};
+use grep::searcher::{
+ BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder,
+};
+use ignore::overrides::{Override, OverrideBuilder};
+use ignore::types::{FileTypeDef, Types, TypesBuilder};
+use ignore::{Walk, WalkBuilder, WalkParallel};
+use log;
+use num_cpus;
+use regex;
+use termcolor::{BufferWriter, ColorChoice, WriteColor};
+
+use crate::app;
+use crate::config;
+use crate::logger::Logger;
+use crate::messages::{set_ignore_messages, set_messages};
+use crate::path_printer::{PathPrinter, PathPrinterBuilder};
+use crate::search::{
+ PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder,
+};
+use crate::subject::SubjectBuilder;
+use crate::Result;
+
+/// The command that ripgrep should execute based on the command line
+/// configuration.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Command {
+ /// Search using exactly one thread.
+ Search,
+ /// Search using possibly many threads.
+ SearchParallel,
+ /// The command line parameters suggest that a search should occur, but
+ /// ripgrep knows that a match can never be found (e.g., no given patterns
+ /// or --max-count=0).
+ SearchNever,
+ /// Show the files that would be searched, but don't actually search them,
+ /// and use exactly one thread.
+ Files,
+ /// Show the files that would be searched, but don't actually search them,
+ /// and perform directory traversal using possibly many threads.
+ FilesParallel,
+ /// List all file type definitions configured, including the default file
+ /// types and any additional file types added to the command line.
+ Types,
+ /// Print the version of PCRE2 in use.
+ PCRE2Version,
+}
+
+impl Command {
+ /// Returns true if and only if this command requires executing a search.
+ fn is_search(&self) -> bool {
+ use self::Command::*;
+
+ match *self {
+ Search | SearchParallel => true,
+ SearchNever | Files | FilesParallel | Types | PCRE2Version => {
+ false
+ }
+ }
+ }
+}
+
+/// The primary configuration object used throughout ripgrep. It provides a
+/// high-level convenient interface to the provided command line arguments.
+///
+/// An `Args` object is cheap to clone and can be used from multiple threads
+/// simultaneously.
+#[derive(Clone, Debug)]
+pub struct Args(Arc<ArgsImp>);
+
+#[derive(Clone, Debug)]
+struct ArgsImp {
+ /// Mid-to-low level routines for extracting CLI arguments.
+ matches: ArgMatches,
+ /// The patterns provided at the command line and/or via the -f/--file
+ /// flag. This may be empty.
+ patterns: Vec<String>,
+ /// A matcher built from the patterns.
+ ///
+ /// It's important that this is only built once, since building this goes
+ /// through regex compilation and various types of analyses. That is, if
+ /// you need many of theses (one per thread, for example), it is better to
+ /// build it once and then clone it.
+ matcher: PatternMatcher,
+ /// The paths provided at the command line. This is guaranteed to be
+ /// non-empty. (If no paths are provided, then a default path is created.)
+ paths: Vec<PathBuf>,
+ /// Returns true if and only if `paths` had to be populated with a single
+ /// default path.
+ using_default_path: bool,
+}
+
+impl Args {
+ /// Parse the command line arguments for this process.
+ ///
+ /// If a CLI usage error occurred, then exit the process and print a usage
+ /// or error message. Similarly, if the user requested the version of
+ /// ripgrep, then print the version and exit.
+ ///
+ /// Also, initialize a global logger.
+ pub fn parse() -> Result<Args> {
+ // We parse the args given on CLI. This does not include args from
+ // the config. We use the CLI args as an initial configuration while
+ // trying to parse config files. If a config file exists and has
+ // arguments, then we re-parse argv, otherwise we just use the matches
+ // we have here.
+ let early_matches = ArgMatches::new(clap_matches(env::args_os())?);
+ set_messages(!early_matches.is_present("no-messages"));
+ set_ignore_messages(!early_matches.is_present("no-ignore-messages"));
+
+ if let Err(err) = Logger::init() {
+ return Err(format!("failed to initialize logger: {}", err).into());
+ }
+ if early_matches.is_present("trace") {
+ log::set_max_level(log::LevelFilter::Trace);
+ } else if early_matches.is_present("debug") {
+ log::set_max_level(log::LevelFilter::Debug);
+ } else {
+ log::set_max_level(log::LevelFilter::Warn);
+ }
+
+ let matches = early_matches.reconfigure()?;
+ // The logging level may have changed if we brought in additional
+ // arguments from a configuration file, so recheck it and set the log
+ // level as appropriate.
+ if matches.is_present("trace") {
+ log::set_max_level(log::LevelFilter::Trace);
+ } else if matches.is_present("debug") {
+ log::set_max_level(log::LevelFilter::Debug);
+ } else {
+ log::set_max_level(log::LevelFilter::Warn);
+ }
+ set_messages(!matches.is_present("no-messages"));
+ set_ignore_messages(!matches.is_present("no-ignore-messages"));
+ matches.to_args()
+ }
+
+ /// Return direct access to command line arguments.
+ fn matches(&self) -> &ArgMatches {
+ &self.0.matches
+ }
+
+ /// Return the patterns found in the command line arguments. This includes
+ /// patterns read via the -f/--file flags.
+ fn patterns(&self) -> &[String] {
+ &self.0.patterns
+ }
+
+ /// Return the matcher builder from the patterns.
+ fn matcher(&self) -> &PatternMatcher {
+ &self.0.matcher
+ }
+
+ /// Return the paths found in the command line arguments. This is
+ /// guaranteed to be non-empty. In the case where no explicit arguments are
+ /// provided, a single default path is provided automatically.
+ fn paths(&self) -> &[PathBuf] {
+ &self.0.paths
+ }
+
+ /// Returns true if and only if `paths` had to be populated with a default
+ /// path, which occurs only when no paths were given as command line
+ /// arguments.
+ fn using_default_path(&self) -> bool {
+ self.0.using_default_path
+ }
+
+ /// Return the printer that should be used for formatting the output of
+ /// search results.
+ ///
+ /// The returned printer will write results to the given writer.
+ fn printer<W: WriteColor>(&self, wtr: W) -> Result<Printer<W>> {
+ match self.matches().output_kind() {
+ OutputKind::Standard => {
+ let separator_search = self.command()? == Command::Search;
+ self.matches()
+ .printer_standard(self.paths(), wtr, separator_search)
+ .map(Printer::Standard)
+ }
+ OutputKind::Summary => self
+ .matches()
+ .printer_summary(self.paths(), wtr)
+ .map(Printer::Summary),
+ OutputKind::JSON => {
+ self.matches().printer_json(wtr).map(Printer::JSON)
+ }
+ }
+ }
+}
+
+/// High level public routines for building data structures used by ripgrep
+/// from command line arguments.
+impl Args {
+ /// Create a new buffer writer for multi-threaded printing with color
+ /// support.
+ pub fn buffer_writer(&self) -> Result<BufferWriter> {
+ let mut wtr = BufferWriter::stdout(self.matches().color_choice());
+ wtr.separator(self.matches().file_separator()?);
+ Ok(wtr)
+ }
+
+ /// Return the high-level command that ripgrep should run.
+ pub fn command(&self) -> Result<Command> {
+ let is_one_search = self.matches().is_one_search(self.paths());
+ let threads = self.matches().threads()?;
+ let one_thread = is_one_search || threads == 1;
+
+ Ok(if self.matches().is_present("pcre2-version") {
+ Command::PCRE2Version
+ } else if self.matches().is_present("type-list") {
+ Command::Types
+ } else if self.matches().is_present("files") {
+ if one_thread {
+ Command::Files
+ } else {
+ Command::FilesParallel
+ }
+ } else if self.matches().can_never_match(self.patterns()) {
+ Command::SearchNever
+ } else if one_thread {
+ Command::Search
+ } else {
+ Command::SearchParallel
+ })
+ }
+
+ /// Builder a path printer that can be used for printing just file paths,
+ /// with optional color support.
+ ///
+ /// The printer will print paths to the given writer.
+ pub fn path_printer<W: WriteColor>(
+ &self,
+ wtr: W,
+ ) -> Result<PathPrinter<W>> {
+ let mut builder = PathPrinterBuilder::new();
+ builder
+ .color_specs(self.matches().color_specs()?)
+ .separator(self.matches().path_separator()?)
+ .terminator(self.matches().path_terminator().unwrap_or(b'\n'));
+ Ok(builder.build(wtr))
+ }
+
+ /// Returns true if and only if ripgrep should be "quiet."
+ pub fn quiet(&self) -> bool {
+ self.matches().is_present("quiet")
+ }
+
+ /// Returns true if and only if the search should quit after finding the
+ /// first match.
+ pub fn quit_after_match(&self) -> Result<bool> {
+ Ok(self.matches().is_present("quiet") && self.stats()?.is_none())
+ }
+
+ /// Build a worker for executing searches.
+ ///
+ /// Search results are written to the given writer.
+ pub fn search_worker<W: WriteColor>(
+ &self,
+ wtr: W,
+ ) -> Result<SearchWorker<W>> {
+ let matches = self.matches();
+ let matcher = self.matcher().clone();
+ let printer = self.printer(wtr)?;
+ let searcher = matches.searcher(self.paths())?;
+ let mut builder = SearchWorkerBuilder::new();
+ builder
+ .json_stats(matches.is_present("json"))
+ .preprocessor(matches.preprocessor())
+ .preprocessor_globs(matches.preprocessor_globs()?)
+ .search_zip(matches.is_present("search-zip"))
+ .binary_detection_implicit(matches.binary_detection_implicit())
+ .binary_detection_explicit(matches.binary_detection_explicit());
+ Ok(builder.build(matcher, searcher, printer))
+ }
+
+ /// Returns a zero value for tracking statistics if and only if it has been
+ /// requested.
+ ///
+ /// When this returns a `Stats` value, then it is guaranteed that the
+ /// search worker will be configured to track statistics as well.
+ pub fn stats(&self) -> Result<Option<Stats>> {
+ Ok(if self.command()?.is_search() && self.matches().stats() {
+ Some(Stats::new())
+ } else {
+ None
+ })
+ }
+
+ /// Return a builder for constructing subjects. A subject represents a
+ /// single unit of something to search. Typically, this corresponds to a
+ /// file or a stream such as stdin.
+ pub fn subject_builder(&self) -> SubjectBuilder {
+ let mut builder = SubjectBuilder::new();
+ builder.strip_dot_prefix(self.using_default_path());
+ builder
+ }
+
+ /// Execute the given function with a writer to stdout that enables color
+ /// support based on the command line configuration.
+ pub fn stdout(&self) -> cli::StandardStream {
+ let color = self.matches().color_choice();
+ if self.matches().is_present("line-buffered") {
+ cli::stdout_buffered_line(color)
+ } else if self.matches().is_present("block-buffered") {
+ cli::stdout_buffered_block(color)
+ } else {
+ cli::stdout(color)
+ }
+ }
+
+ /// Return the type definitions compiled into ripgrep.
+ ///
+ /// If there was a problem reading and parsing the type definitions, then
+ /// this returns an error.
+ pub fn type_defs(&self) -> Result<Vec<FileTypeDef>> {
+ Ok(self.matches().types()?.definitions().to_vec())
+ }
+
+ /// Return a walker that never uses additional threads.
+ pub fn walker(&self) -> Result<Walk> {
+ Ok(self.matches().walker_builder(self.paths())?.build())
+ }
+
+ /// Return a walker that never uses additional threads.
+ pub fn walker_parallel(&self) -> Result<WalkParallel> {
+ Ok(self.matches().walker_builder(self.paths())?.build_parallel())
+ }
+}
+
+/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to
+/// the parsed arguments.
+#[derive(Clone, Debug)]
+struct ArgMatches(clap::ArgMatches<'static>);
+
+/// The output format. Generally, this corresponds to the printer that ripgrep
+/// uses to show search results.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum OutputKind {
+ /// Classic grep-like or ack-like format.
+ Standard,
+ /// Show matching files and possibly the number of matches in each file.
+ Summary,
+ /// Emit match information in the JSON Lines format.
+ JSON,
+}
+
+/// The sort criteria, if present.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+struct SortBy {
+ /// Whether to reverse the sort criteria (i.e., descending order).
+ reverse: bool,
+ /// The actual sorting criteria.
+ kind: SortByKind,
+}
+
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum SortByKind {
+ /// No sorting at all.
+ None,
+ /// Sort by path.
+ Path,
+ /// Sort by last modified time.
+ LastModified,
+ /// Sort by last accessed time.
+ LastAccessed,
+ /// Sort by creation time.
+ Created,
+}
+
+impl SortBy {
+ fn asc(kind: SortByKind) -> SortBy {
+ SortBy { reverse: false, kind: kind }
+ }
+
+ fn desc(kind: SortByKind) -> SortBy {
+ SortBy { reverse: true, kind: kind }
+ }
+
+ fn none() -> SortBy {
+ SortBy::asc(SortByKind::None)
+ }
+
+ /// Try to check that the sorting criteria selected is actually supported.
+ /// If it isn't, then an error is returned.
+ fn check(&self) -> Result<()> {
+ match self.kind {
+ SortByKind::None | SortByKind::Path => {}
+ SortByKind::LastModified => {
+ env::current_exe()?.metadata()?.modified()?;
+ }
+ SortByKind::LastAccessed => {
+ env::current_exe()?.metadata()?.accessed()?;
+ }
+ SortByKind::Created => {
+ env::current_exe()?.metadata()?.created()?;
+ }
+ }
+ Ok(())
+ }
+
+ fn configure_walk_builder(self, builder: &mut WalkBuilder) {
+ // This isn't entirely optimal. In particular, we will wind up issuing
+ // a stat for many files redundantly. Aside from having potentially
+ // inconsistent results with respect to sorting, this is also slow.
+ // We could fix this here at the expense of memory by caching stat
+ // calls. A better fix would be to find a way to push this down into
+ // directory traversal itself, but that's a somewhat nasty change.
+ match self.kind {
+ SortByKind::None => {}
+ SortByKind::Path => {
+ if self.reverse {
+ builder.sort_by_file_name(|a, b| a.cmp(b).reverse());
+ } else {
+ builder.sort_by_file_name(|a, b| a.cmp(b));
+ }
+ }
+ SortByKind::LastModified => {
+ builder.sort_by_file_path(move |a, b| {
+ sort_by_metadata_time(a, b, self.reverse, |md| {
+ md.modified()
+ })
+ });
+ }
+ SortByKind::LastAccessed => {
+ builder.sort_by_file_path(move |a, b| {
+ sort_by_metadata_time(a, b, self.reverse, |md| {
+ md.accessed()
+ })
+ });
+ }
+ SortByKind::Created => {
+ builder.sort_by_file_path(move |a, b| {
+ sort_by_metadata_time(a, b, self.reverse, |md| {
+ md.created()
+ })
+ });
+ }
+ }
+ }
+}
+
+impl SortByKind {
+ fn new(kind: &str) -> SortByKind {
+ match kind {
+ "none" => SortByKind::None,
+ "path" => SortByKind::Path,
+ "modified" => SortByKind::LastModified,
+ "accessed" => SortByKind::LastAccessed,
+ "created" => SortByKind::Created,
+ _ => SortByKind::None,
+ }
+ }
+}
+
+/// Encoding mode the searcher will use.
+#[derive(Clone, Debug)]
+enum EncodingMode {
+ /// Use an explicit encoding forcefully, but let BOM sniffing override it.
+ Some(Encoding),
+ /// Use only BOM sniffing to auto-detect an encoding.
+ Auto,
+ /// Use no explicit encoding and disable all BOM sniffing. This will
+ /// always result in searching the raw bytes, regardless of their
+ /// true encoding.
+ Disabled,
+}
+
+impl EncodingMode {
+ /// Checks if an explicit encoding has been set. Returns false for
+ /// automatic BOM sniffing and no sniffing.
+ ///
+ /// This is only used to determine whether PCRE2 needs to have its own
+ /// UTF-8 checking enabled. If we have an explicit encoding set, then
+ /// we're always guaranteed to get UTF-8, so we can disable PCRE2's check.
+ /// Otherwise, we have no such guarantee, and must enable PCRE2' UTF-8
+ /// check.
+ #[cfg(feature = "pcre2")]
+ fn has_explicit_encoding(&self) -> bool {
+ match self {
+ EncodingMode::Some(_) => true,
+ _ => false,
+ }
+ }
+}
+
+impl ArgMatches {
+ /// Create an ArgMatches from clap's parse result.
+ fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
+ ArgMatches(clap_matches)
+ }
+
+ /// Run clap and return the matches using a config file if present. If clap
+ /// determines a problem with the user provided arguments (or if --help or
+ /// --version are given), then an error/usage/version will be printed and
+ /// the process will exit.
+ ///
+ /// If there are no additional arguments from the environment (e.g., a
+ /// config file), then the given matches are returned as is.
+ fn reconfigure(self) -> Result<ArgMatches> {
+ // If the end user says no config, then respect it.
+ if self.is_present("no-config") {
+ log::debug!(
+ "not reading config files because --no-config is present"
+ );
+ return Ok(self);
+ }
+ // If the user wants ripgrep to use a config file, then parse args
+ // from that first.
+ let mut args = config::args();
+ if args.is_empty() {
+ return Ok(self);
+ }
+ let mut cliargs = env::args_os();
+ if let Some(bin) = cliargs.next() {
+ args.insert(0, bin);
+ }
+ args.extend(cliargs);
+ log::debug!("final argv: {:?}", args);
+ Ok(ArgMatches(clap_matches(args)?))
+ }
+
+ /// Convert the result of parsing CLI arguments into ripgrep's higher level
+ /// configuration structure.
+ fn to_args(self) -> Result<Args> {
+ // We compute these once since they could be large.
+ let patterns = self.patterns()?;
+ let matcher = self.matcher(&patterns)?;
+ let mut paths = self.paths();
+ let using_default_path = if paths.is_empty() {
+ paths.push(self.path_default());
+ true
+ } else {
+ false
+ };
+ Ok(Args(Arc::new(ArgsImp {
+ matches: self,
+ patterns: patterns,
+ matcher: matcher,
+ paths: paths,
+ using_default_path: using_default_path,
+ })))
+ }
+}
+
+/// High level routines for converting command line arguments into various
+/// data structures used by ripgrep.
+///
+/// Methods are sorted alphabetically.
+impl ArgMatches {
+ /// Return the matcher that should be used for searching.
+ ///
+ /// If there was a problem building the matcher (e.g., a syntax error),
+ /// then this returns an error.
+ #[cfg(feature = "pcre2")]
+ fn matcher(&self, patterns: &[String]) -> Result<PatternMatcher> {
+ if self.is_present("pcre2") {
+ let matcher = self.matcher_pcre2(patterns)?;
+ Ok(PatternMatcher::PCRE2(matcher))
+ } else if self.is_present("auto-hybrid-regex") {
+ let rust_err = match self.matcher_rust(patterns) {
+ Ok(matcher) => return Ok(PatternMatcher::RustRegex(matcher)),
+ Err(err) => err,
+ };
+ log::debug!(
+ "error building Rust regex in hybrid mode:\n{}",
+ rust_err,
+ );
+ let pcre_err = match self.matcher_pcre2(patterns) {
+ Ok(matcher) => return Ok(PatternMatcher::PCRE2(matcher)),
+ Err(err) => err,
+ };
+ Err(From::from(format!(
+ "regex could not be compiled with either the default regex \
+ engine or with PCRE2.\n\n\
+ default regex engine error:\n{}\n{}\n{}\n\n\
+ PCRE2 regex engine error:\n{}",
+ "~".repeat(79),
+ rust_err,
+ "~".repeat(79),
+ pcre_err,
+ )))
+ } else {
+ let matcher = match self.matcher_rust(patterns) {
+ Ok(matcher) => matcher,
+ Err(err) => {
+ return Err(From::from(suggest_pcre2(err.to_string())));
+ }
+ };
+ Ok(PatternMatcher::RustRegex(matcher))
+ }
+ }
+
+ /// Return the matcher that should be used for searching.
+ ///
+ /// If there was a problem building the matcher (e.g., a syntax error),
+ /// then this returns an error.
+ #[cfg(not(feature = "pcre2"))]
+ fn matcher(&self, patterns: &[String]) -> Result<PatternMatcher> {
+ if self.is_present("pcre2") {
+ return Err(From::from(
+ "PCRE2 is not available in this build of ripgrep",
+ ));
+ }
+ let matcher = self.matcher_rust(patterns)?;
+ Ok(PatternMatcher::RustRegex(matcher))
+ }
+
+ /// Build a matcher using Rust's regex engine.
+ ///
+ /// If there was a problem building the matcher (such as a regex syntax
+ /// error), then an error is returned.
+ fn matcher_rust(&self, patterns: &[String]) -> Result<RustRegexMatcher> {
+ let mut builder = RustRegexMatcherBuilder::new();
+ builder
+ .case_smart(self.case_smart())
+ .case_insensitive(self.case_insensitive())
+ .multi_line(true)
+ .unicode(self.unicode())
+ .octal(false)
+ .word(self.is_present("word-regexp"));
+ if self.is_present("multiline") {
+ builder.dot_matches_new_line(self.is_present("multiline-dotall"));
+ if self.is_present("crlf") {
+ builder.crlf(true).line_terminator(None);
+ }
+ } else {
+ builder.line_terminator(Some(b'\n')).dot_matches_new_line(false);
+ if self.is_present("crlf") {
+ builder.crlf(true);
+ }
+ // We don't need to set this in multiline mode since mulitline
+ // matchers don't use optimizations related to line terminators.
+ // Moreover, a mulitline regex used with --null-data should
+ // be allowed to match NUL bytes explicitly, which this would
+ // otherwise forbid.
+ if self.is_present("null-data") {
+ builder.line_terminator(Some(b'\x00'));
+ }
+ }
+ if let Some(limit) = self.regex_size_limit()? {
+ builder.size_limit(limit);
+ }
+ if let Some(limit) = self.dfa_size_limit()? {
+ builder.dfa_size_limit(limit);
+ }
+ let res = if self.is_present("fixed-strings") {
+ builder.build_literals(patterns)
+ } else {
+ builder.build(&patterns.join("|"))
+ };
+ match res {
+ Ok(m) => Ok(m),
+ Err(err) => Err(From::from(suggest_multiline(err.to_string()))),
+ }
+ }
+
+ /// Build a matcher using PCRE2.
+ ///
+ /// If there was a problem building the matcher (such as a regex syntax
+ /// error), then an error is returned.
+ #[cfg(feature = "pcre2")]
+ fn matcher_pcre2(&self, patterns: &[String]) -> Result<PCRE2RegexMatcher> {
+ let mut builder = PCRE2RegexMatcherBuilder::new();
+ builder
+ .case_smart(self.case_smart())
+ .caseless(self.case_insensitive())
+ .multi_line(true)
+ .word(self.is_present("word-regexp"));
+ // For whatever reason, the JIT craps out during regex compilation with
+ // a "no more memory" error on 32 bit systems. So don't use it there.
+ if cfg!(target_pointer_width = "64") {
+ builder
+ .jit_if_available(true)
+ // The PCRE2 docs say that 32KB is the default, and that 1MB
+ // should be big enough for anything. But let's crank it to
+ // 10MB.
+ .max_jit_stack_size(Some(10 * (1 << 20)));
+ }
+ if self.unicode() {
+ builder.utf(true).ucp(true);
+ if self.encoding()?.has_explicit_encoding() {
+ // SAFETY: If an encoding was specified, then we're guaranteed
+ // to get valid UTF-8, so we can disable PCRE2's UTF checking.
+ // (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
+ unsafe {
+ builder.disable_utf_check();
+ }
+ }
+ }
+ if self.is_present("multiline") {
+ builder.dotall(self.is_present("multiline-dotall"));
+ }
+ if self.is_present("crlf") {
+ builder.crlf(true);
+ }
+ Ok(builder.build(&patterns.join("|"))?)
+ }
+
+ /// Build a JSON printer that writes results to the given writer.
+ fn printer_json<W: io::Write>(&self, wtr: W) -> Result<JSON<W>> {
+ let mut builder = JSONBuilder::new();
+ builder
+ .pretty(false)
+ .max_matches(self.max_count()?)
+ .always_begin_end(false);
+ Ok(builder.build(wtr))
+ }
+
+ /// Build a Standard printer that writes results to the given writer.
+ ///
+ /// The given paths are used to configure aspects of the printer.
+ ///
+ /// If `separator_search` is true, then the returned printer will assume
+ /// the responsibility of printing a separator between each set of
+ /// search results, when appropriate (e.g., when contexts are enabled).
+ /// When it's set to false, the caller is responsible for handling
+ /// separators.
+ ///
+ /// In practice, we want the printer to handle it in the single threaded
+ /// case but not in the multi-threaded case.
+ fn printer_standard<W: WriteColor>(
+ &self,
+ paths: &[PathBuf],
+ wtr: W,
+ separator_search: bool,
+ ) -> Result<Standard<W>> {
+ let mut builder = StandardBuilder::new();
+ builder
+ .color_specs(self.color_specs()?)
+ .stats(self.stats())
+ .heading(self.heading())
+ .path(self.with_filename(paths))
+ .only_matching(self.is_present("only-matching"))
+ .per_match(self.is_present("vimgrep"))
+ .replacement(self.replacement())
+ .max_columns(self.max_columns()?)
+ .max_columns_preview(self.max_columns_preview())
+ .max_matches(self.max_count()?)
+ .column(self.column())
+ .byte_offset(self.is_present("byte-offset"))
+ .trim_ascii(self.is_present("trim"))
+ .separator_search(None)
+ .separator_context(self.context_separator())
+ .separator_field_match(b":".to_vec())
+ .separator_field_context(b"-".to_vec())
+ .separator_path(self.path_separator()?)
+ .path_terminator(self.path_terminator());
+ if separator_search {
+ builder.separator_search(self.file_separator()?);
+ }
+ Ok(builder.build(wtr))
+ }
+
+ /// Build a Summary printer that writes results to the given writer.
+ ///
+ /// The given paths are used to configure aspects of the printer.
+ ///
+ /// This panics if the output format is not `OutputKind::Summary`.
+ fn printer_summary<W: WriteColor>(
+ &self,
+ paths: &[PathBuf],
+ wtr: W,
+ ) -> Result<Summary<W>> {
+ let mut builder = SummaryBuilder::new();
+ builder
+ .kind(self.summary_kind().expect("summary format"))
+ .color_specs(self.color_specs()?)
+ .stats(self.stats())
+ .path(self.with_filename(paths))
+ .max_matches(self.max_count()?)
+ .exclude_zero(!self.is_present("include-zero"))
+ .separator_field(b":".to_vec())
+ .separator_path(self.path_separator()?)
+ .path_terminator(self.path_terminator());
+ Ok(builder.build(wtr))
+ }
+
+ /// Build a searcher from the command line parameters.
+ fn searcher(&self, paths: &[PathBuf]) -> Result<Searcher> {
+ let (ctx_before, ctx_after) = self.contexts()?;
+ let line_term = if self.is_present("crlf") {
+ LineTerminator::crlf()
+ } else if self.is_present("null-data") {
+ LineTerminator::byte(b'\x00')
+ } else {
+ LineTerminator::byte(b'\n')
+ };
+ let mut builder = SearcherBuilder::new();
+ builder
+ .line_terminator(line_term)
+ .invert_match(self.is_present("invert-match"))
+ .line_number(self.line_number(paths))
+ .multi_line(self.is_present("multiline"))
+ .before_context(ctx_before)
+ .after_context(ctx_after)
+ .passthru(self.is_present("passthru"))
+ .memory_map(self.mmap_choice(paths));
+ match self.encoding()? {
+ EncodingMode::Some(enc) => {
+ builder.encoding(Some(enc));
+ }
+ EncodingMode::Auto => {} // default for the searcher
+ EncodingMode::Disabled => {
+ builder.bom_sniffing(false);
+ }
+ }
+ Ok(builder.build())
+ }
+
+ /// Return a builder for recursively traversing a directory while
+ /// respecting ignore rules.
+ ///
+ /// If there was a problem parsing the CLI arguments necessary for
+ /// constructing the builder, then this returns an error.
+ fn walker_builder(&self, paths: &[PathBuf]) -> Result<WalkBuilder> {
+ let mut builder = WalkBuilder::new(&paths[0]);
+ for path in &paths[1..] {
+ builder.add(path);
+ }
+ for path in self.ignore_paths() {
+ if let Some(err) = builder.add_ignore(path) {
+ ignore_message!("{}", err);
+ }
+ }
+ builder
+ .max_depth(self.usize_of("max-depth")?)
+ .follow_links(self.is_present("follow"))
+ .max_filesize(self.max_file_size()?)
+ .threads(self.threads()?)
+ .same_file_system(self.is_present("one-file-system"))
+ .skip_stdout(!self.is_present("files"))
+ .overrides(self.overrides()?)
+ .types(self.types()?)
+ .hidden(!self.hidden())
+ .parents(!self.no_ignore_parent())
+ .ignore(!self.no_ignore_dot())
+ .git_global(!self.no_ignore_vcs() && !self.no_ignore_global())
+ .git_ignore(!self.no_ignore_vcs())
+ .git_exclude(!self.no_ignore_vcs() && !self.no_ignore_exclude())
+ .require_git(!self.is_present("no-require-git"))
+ .ignore_case_insensitive(self.ignore_file_case_insensitive());
+ if !self.no_ignore() {
+ builder.add_custom_ignore_filename(".rgignore");
+ }
+ let sortby = self.sort_by()?;
+ sortby.check()?;
+ sortby.configure_walk_builder(&mut builder);
+ Ok(builder)
+ }
+}
+
+/// Mid level routines for converting command line arguments into various types
+/// of data structures.
+///
+/// Methods are sorted alphabetically.
+impl ArgMatches {
+ /// Returns the form of binary detection to perform on files that are
+ /// implicitly searched via recursive directory traversal.
+ fn binary_detection_implicit(&self) -> BinaryDetection {
+ let none = self.is_present("text") || self.is_present("null-data");
+ let convert =
+ self.is_present("binary") || self.unrestricted_count() >= 3;
+ if none {
+ BinaryDetection::none()
+ } else if convert {
+ BinaryDetection::convert(b'\x00')
+ } else {
+ BinaryDetection::quit(b'\x00')
+ }
+ }
+
+ /// Returns the form of binary detection to perform on files that are
+ /// explicitly searched via the user invoking ripgrep on a particular
+ /// file or files or stdin.
+ ///
+ /// In general, this should never be BinaryDetection::quit, since that acts
+ /// as a filter (but quitting immediately once a NUL byte is seen), and we
+ /// should never filter out files that the user wants to explicitly search.
+ fn binary_detection_explicit(&self) -> BinaryDetection {
+ let none = self.is_present("text") || self.is_present("null-data");
+ if none {
+ BinaryDetection::none()
+ } else {
+ BinaryDetection::convert(b'\x00')
+ }
+ }
+
+ /// Returns true if the command line configuration implies that a match
+ /// can never be shown.
+ fn can_never_match(&self, patterns: &[String]) -> bool {
+ patterns.is_empty() || self.max_count().ok() == Some(Some(0))
+ }
+
+ /// Returns true if and only if case should be ignore.
+ ///
+ /// If --case-sensitive is present, then case is never ignored, even if
+ /// --ignore-case is present.
+ fn case_insensitive(&self) -> bool {
+ self.is_present("ignore-case") && !self.is_present("case-sensitive")
+ }
+
+ /// Returns true if and only if smart case has been enabled.
+ ///
+ /// If either --ignore-case of --case-sensitive are present, then smart
+ /// case is disabled.
+ fn case_smart(&self) -> bool {
+ self.is_present("smart-case")
+ && !self.is_present("ignore-case")
+ && !self.is_present("case-sensitive")
+ }
+
+ /// Returns the user's color choice based on command line parameters and
+ /// environment.
+ fn color_choice(&self) -> ColorChoice {
+ let preference = match self.value_of_lossy("color") {
+ None => "auto".to_string(),
+ Some(v) => v,
+ };
+ if preference == "always" {
+ ColorChoice::Always
+ } else if preference == "ansi" {
+ ColorChoice::AlwaysAnsi
+ } else if preference == "auto" {
+ if cli::is_tty_stdout() || self.is_present("pretty") {
+ ColorChoice::Auto
+ } else {
+ ColorChoice::Never
+ }
+ } else {
+ ColorChoice::Never
+ }
+ }
+
+ /// Returns the color specifications given by the user on the CLI.
+ ///
+ /// If the was a problem parsing any of the provided specs, then an error
+ /// is returned.
+ fn color_specs(&self) -> Result<ColorSpecs> {
+ // Start with a default set of color specs.
+ let mut specs = default