diff options
Diffstat (limited to 'crates/core/args.rs')
-rw-r--r-- | crates/core/args.rs | 1805 |
1 files changed, 1805 insertions, 0 deletions
diff --git a/crates/core/args.rs b/crates/core/args.rs new file mode 100644 index 00000000..989f95c2 --- /dev/null +++ b/crates/core/args.rs @@ -0,0 +1,1805 @@ +use std::cmp; +use std::env; +use std::ffi::{OsStr, OsString}; +use std::fs; +use std::io::{self, Write}; +use std::path::{Path, PathBuf}; +use std::process; +use std::sync::Arc; +use std::time::SystemTime; + +use clap; +use grep::cli; +use grep::matcher::LineTerminator; +#[cfg(feature = "pcre2")] +use grep::pcre2::{ + RegexMatcher as PCRE2RegexMatcher, + RegexMatcherBuilder as PCRE2RegexMatcherBuilder, +}; +use grep::printer::{ + default_color_specs, ColorSpecs, JSONBuilder, Standard, StandardBuilder, + Stats, Summary, SummaryBuilder, SummaryKind, JSON, +}; +use grep::regex::{ + RegexMatcher as RustRegexMatcher, + RegexMatcherBuilder as RustRegexMatcherBuilder, +}; +use grep::searcher::{ + BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder, +}; +use ignore::overrides::{Override, OverrideBuilder}; +use ignore::types::{FileTypeDef, Types, TypesBuilder}; +use ignore::{Walk, WalkBuilder, WalkParallel}; +use log; +use num_cpus; +use regex; +use termcolor::{BufferWriter, ColorChoice, WriteColor}; + +use crate::app; +use crate::config; +use crate::logger::Logger; +use crate::messages::{set_ignore_messages, set_messages}; +use crate::path_printer::{PathPrinter, PathPrinterBuilder}; +use crate::search::{ + PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder, +}; +use crate::subject::SubjectBuilder; +use crate::Result; + +/// The command that ripgrep should execute based on the command line +/// configuration. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Command { + /// Search using exactly one thread. + Search, + /// Search using possibly many threads. + SearchParallel, + /// The command line parameters suggest that a search should occur, but + /// ripgrep knows that a match can never be found (e.g., no given patterns + /// or --max-count=0). + SearchNever, + /// Show the files that would be searched, but don't actually search them, + /// and use exactly one thread. + Files, + /// Show the files that would be searched, but don't actually search them, + /// and perform directory traversal using possibly many threads. + FilesParallel, + /// List all file type definitions configured, including the default file + /// types and any additional file types added to the command line. + Types, + /// Print the version of PCRE2 in use. + PCRE2Version, +} + +impl Command { + /// Returns true if and only if this command requires executing a search. + fn is_search(&self) -> bool { + use self::Command::*; + + match *self { + Search | SearchParallel => true, + SearchNever | Files | FilesParallel | Types | PCRE2Version => { + false + } + } + } +} + +/// The primary configuration object used throughout ripgrep. It provides a +/// high-level convenient interface to the provided command line arguments. +/// +/// An `Args` object is cheap to clone and can be used from multiple threads +/// simultaneously. +#[derive(Clone, Debug)] +pub struct Args(Arc<ArgsImp>); + +#[derive(Clone, Debug)] +struct ArgsImp { + /// Mid-to-low level routines for extracting CLI arguments. + matches: ArgMatches, + /// The patterns provided at the command line and/or via the -f/--file + /// flag. This may be empty. + patterns: Vec<String>, + /// A matcher built from the patterns. + /// + /// It's important that this is only built once, since building this goes + /// through regex compilation and various types of analyses. That is, if + /// you need many of theses (one per thread, for example), it is better to + /// build it once and then clone it. + matcher: PatternMatcher, + /// The paths provided at the command line. This is guaranteed to be + /// non-empty. (If no paths are provided, then a default path is created.) + paths: Vec<PathBuf>, + /// Returns true if and only if `paths` had to be populated with a single + /// default path. + using_default_path: bool, +} + +impl Args { + /// Parse the command line arguments for this process. + /// + /// If a CLI usage error occurred, then exit the process and print a usage + /// or error message. Similarly, if the user requested the version of + /// ripgrep, then print the version and exit. + /// + /// Also, initialize a global logger. + pub fn parse() -> Result<Args> { + // We parse the args given on CLI. This does not include args from + // the config. We use the CLI args as an initial configuration while + // trying to parse config files. If a config file exists and has + // arguments, then we re-parse argv, otherwise we just use the matches + // we have here. + let early_matches = ArgMatches::new(clap_matches(env::args_os())?); + set_messages(!early_matches.is_present("no-messages")); + set_ignore_messages(!early_matches.is_present("no-ignore-messages")); + + if let Err(err) = Logger::init() { + return Err(format!("failed to initialize logger: {}", err).into()); + } + if early_matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if early_matches.is_present("debug") { + log::set_max_level(log::LevelFilter::Debug); + } else { + log::set_max_level(log::LevelFilter::Warn); + } + + let matches = early_matches.reconfigure()?; + // The logging level may have changed if we brought in additional + // arguments from a configuration file, so recheck it and set the log + // level as appropriate. + if matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if matches.is_present("debug") { + log::set_max_level(log::LevelFilter::Debug); + } else { + log::set_max_level(log::LevelFilter::Warn); + } + set_messages(!matches.is_present("no-messages")); + set_ignore_messages(!matches.is_present("no-ignore-messages")); + matches.to_args() + } + + /// Return direct access to command line arguments. + fn matches(&self) -> &ArgMatches { + &self.0.matches + } + + /// Return the patterns found in the command line arguments. This includes + /// patterns read via the -f/--file flags. + fn patterns(&self) -> &[String] { + &self.0.patterns + } + + /// Return the matcher builder from the patterns. + fn matcher(&self) -> &PatternMatcher { + &self.0.matcher + } + + /// Return the paths found in the command line arguments. This is + /// guaranteed to be non-empty. In the case where no explicit arguments are + /// provided, a single default path is provided automatically. + fn paths(&self) -> &[PathBuf] { + &self.0.paths + } + + /// Returns true if and only if `paths` had to be populated with a default + /// path, which occurs only when no paths were given as command line + /// arguments. + fn using_default_path(&self) -> bool { + self.0.using_default_path + } + + /// Return the printer that should be used for formatting the output of + /// search results. + /// + /// The returned printer will write results to the given writer. + fn printer<W: WriteColor>(&self, wtr: W) -> Result<Printer<W>> { + match self.matches().output_kind() { + OutputKind::Standard => { + let separator_search = self.command()? == Command::Search; + self.matches() + .printer_standard(self.paths(), wtr, separator_search) + .map(Printer::Standard) + } + OutputKind::Summary => self + .matches() + .printer_summary(self.paths(), wtr) + .map(Printer::Summary), + OutputKind::JSON => { + self.matches().printer_json(wtr).map(Printer::JSON) + } + } + } +} + +/// High level public routines for building data structures used by ripgrep +/// from command line arguments. +impl Args { + /// Create a new buffer writer for multi-threaded printing with color + /// support. + pub fn buffer_writer(&self) -> Result<BufferWriter> { + let mut wtr = BufferWriter::stdout(self.matches().color_choice()); + wtr.separator(self.matches().file_separator()?); + Ok(wtr) + } + + /// Return the high-level command that ripgrep should run. + pub fn command(&self) -> Result<Command> { + let is_one_search = self.matches().is_one_search(self.paths()); + let threads = self.matches().threads()?; + let one_thread = is_one_search || threads == 1; + + Ok(if self.matches().is_present("pcre2-version") { + Command::PCRE2Version + } else if self.matches().is_present("type-list") { + Command::Types + } else if self.matches().is_present("files") { + if one_thread { + Command::Files + } else { + Command::FilesParallel + } + } else if self.matches().can_never_match(self.patterns()) { + Command::SearchNever + } else if one_thread { + Command::Search + } else { + Command::SearchParallel + }) + } + + /// Builder a path printer that can be used for printing just file paths, + /// with optional color support. + /// + /// The printer will print paths to the given writer. + pub fn path_printer<W: WriteColor>( + &self, + wtr: W, + ) -> Result<PathPrinter<W>> { + let mut builder = PathPrinterBuilder::new(); + builder + .color_specs(self.matches().color_specs()?) + .separator(self.matches().path_separator()?) + .terminator(self.matches().path_terminator().unwrap_or(b'\n')); + Ok(builder.build(wtr)) + } + + /// Returns true if and only if ripgrep should be "quiet." + pub fn quiet(&self) -> bool { + self.matches().is_present("quiet") + } + + /// Returns true if and only if the search should quit after finding the + /// first match. + pub fn quit_after_match(&self) -> Result<bool> { + Ok(self.matches().is_present("quiet") && self.stats()?.is_none()) + } + + /// Build a worker for executing searches. + /// + /// Search results are written to the given writer. + pub fn search_worker<W: WriteColor>( + &self, + wtr: W, + ) -> Result<SearchWorker<W>> { + let matches = self.matches(); + let matcher = self.matcher().clone(); + let printer = self.printer(wtr)?; + let searcher = matches.searcher(self.paths())?; + let mut builder = SearchWorkerBuilder::new(); + builder + .json_stats(matches.is_present("json")) + .preprocessor(matches.preprocessor()) + .preprocessor_globs(matches.preprocessor_globs()?) + .search_zip(matches.is_present("search-zip")) + .binary_detection_implicit(matches.binary_detection_implicit()) + .binary_detection_explicit(matches.binary_detection_explicit()); + Ok(builder.build(matcher, searcher, printer)) + } + + /// Returns a zero value for tracking statistics if and only if it has been + /// requested. + /// + /// When this returns a `Stats` value, then it is guaranteed that the + /// search worker will be configured to track statistics as well. + pub fn stats(&self) -> Result<Option<Stats>> { + Ok(if self.command()?.is_search() && self.matches().stats() { + Some(Stats::new()) + } else { + None + }) + } + + /// Return a builder for constructing subjects. A subject represents a + /// single unit of something to search. Typically, this corresponds to a + /// file or a stream such as stdin. + pub fn subject_builder(&self) -> SubjectBuilder { + let mut builder = SubjectBuilder::new(); + builder.strip_dot_prefix(self.using_default_path()); + builder + } + + /// Execute the given function with a writer to stdout that enables color + /// support based on the command line configuration. + pub fn stdout(&self) -> cli::StandardStream { + let color = self.matches().color_choice(); + if self.matches().is_present("line-buffered") { + cli::stdout_buffered_line(color) + } else if self.matches().is_present("block-buffered") { + cli::stdout_buffered_block(color) + } else { + cli::stdout(color) + } + } + + /// Return the type definitions compiled into ripgrep. + /// + /// If there was a problem reading and parsing the type definitions, then + /// this returns an error. + pub fn type_defs(&self) -> Result<Vec<FileTypeDef>> { + Ok(self.matches().types()?.definitions().to_vec()) + } + + /// Return a walker that never uses additional threads. + pub fn walker(&self) -> Result<Walk> { + Ok(self.matches().walker_builder(self.paths())?.build()) + } + + /// Return a walker that never uses additional threads. + pub fn walker_parallel(&self) -> Result<WalkParallel> { + Ok(self.matches().walker_builder(self.paths())?.build_parallel()) + } +} + +/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to +/// the parsed arguments. +#[derive(Clone, Debug)] +struct ArgMatches(clap::ArgMatches<'static>); + +/// The output format. Generally, this corresponds to the printer that ripgrep +/// uses to show search results. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum OutputKind { + /// Classic grep-like or ack-like format. + Standard, + /// Show matching files and possibly the number of matches in each file. + Summary, + /// Emit match information in the JSON Lines format. + JSON, +} + +/// The sort criteria, if present. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +struct SortBy { + /// Whether to reverse the sort criteria (i.e., descending order). + reverse: bool, + /// The actual sorting criteria. + kind: SortByKind, +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum SortByKind { + /// No sorting at all. + None, + /// Sort by path. + Path, + /// Sort by last modified time. + LastModified, + /// Sort by last accessed time. + LastAccessed, + /// Sort by creation time. + Created, +} + +impl SortBy { + fn asc(kind: SortByKind) -> SortBy { + SortBy { reverse: false, kind: kind } + } + + fn desc(kind: SortByKind) -> SortBy { + SortBy { reverse: true, kind: kind } + } + + fn none() -> SortBy { + SortBy::asc(SortByKind::None) + } + + /// Try to check that the sorting criteria selected is actually supported. + /// If it isn't, then an error is returned. + fn check(&self) -> Result<()> { + match self.kind { + SortByKind::None | SortByKind::Path => {} + SortByKind::LastModified => { + env::current_exe()?.metadata()?.modified()?; + } + SortByKind::LastAccessed => { + env::current_exe()?.metadata()?.accessed()?; + } + SortByKind::Created => { + env::current_exe()?.metadata()?.created()?; + } + } + Ok(()) + } + + fn configure_walk_builder(self, builder: &mut WalkBuilder) { + // This isn't entirely optimal. In particular, we will wind up issuing + // a stat for many files redundantly. Aside from having potentially + // inconsistent results with respect to sorting, this is also slow. + // We could fix this here at the expense of memory by caching stat + // calls. A better fix would be to find a way to push this down into + // directory traversal itself, but that's a somewhat nasty change. + match self.kind { + SortByKind::None => {} + SortByKind::Path => { + if self.reverse { + builder.sort_by_file_name(|a, b| a.cmp(b).reverse()); + } else { + builder.sort_by_file_name(|a, b| a.cmp(b)); + } + } + SortByKind::LastModified => { + builder.sort_by_file_path(move |a, b| { + sort_by_metadata_time(a, b, self.reverse, |md| { + md.modified() + }) + }); + } + SortByKind::LastAccessed => { + builder.sort_by_file_path(move |a, b| { + sort_by_metadata_time(a, b, self.reverse, |md| { + md.accessed() + }) + }); + } + SortByKind::Created => { + builder.sort_by_file_path(move |a, b| { + sort_by_metadata_time(a, b, self.reverse, |md| { + md.created() + }) + }); + } + } + } +} + +impl SortByKind { + fn new(kind: &str) -> SortByKind { + match kind { + "none" => SortByKind::None, + "path" => SortByKind::Path, + "modified" => SortByKind::LastModified, + "accessed" => SortByKind::LastAccessed, + "created" => SortByKind::Created, + _ => SortByKind::None, + } + } +} + +/// Encoding mode the searcher will use. +#[derive(Clone, Debug)] +enum EncodingMode { + /// Use an explicit encoding forcefully, but let BOM sniffing override it. + Some(Encoding), + /// Use only BOM sniffing to auto-detect an encoding. + Auto, + /// Use no explicit encoding and disable all BOM sniffing. This will + /// always result in searching the raw bytes, regardless of their + /// true encoding. + Disabled, +} + +impl EncodingMode { + /// Checks if an explicit encoding has been set. Returns false for + /// automatic BOM sniffing and no sniffing. + /// + /// This is only used to determine whether PCRE2 needs to have its own + /// UTF-8 checking enabled. If we have an explicit encoding set, then + /// we're always guaranteed to get UTF-8, so we can disable PCRE2's check. + /// Otherwise, we have no such guarantee, and must enable PCRE2' UTF-8 + /// check. + #[cfg(feature = "pcre2")] + fn has_explicit_encoding(&self) -> bool { + match self { + EncodingMode::Some(_) => true, + _ => false, + } + } +} + +impl ArgMatches { + /// Create an ArgMatches from clap's parse result. + fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches { + ArgMatches(clap_matches) + } + + /// Run clap and return the matches using a config file if present. If clap + /// determines a problem with the user provided arguments (or if --help or + /// --version are given), then an error/usage/version will be printed and + /// the process will exit. + /// + /// If there are no additional arguments from the environment (e.g., a + /// config file), then the given matches are returned as is. + fn reconfigure(self) -> Result<ArgMatches> { + // If the end user says no config, then respect it. + if self.is_present("no-config") { + log::debug!( + "not reading config files because --no-config is present" + ); + return Ok(self); + } + // If the user wants ripgrep to use a config file, then parse args + // from that first. + let mut args = config::args(); + if args.is_empty() { + return Ok(self); + } + let mut cliargs = env::args_os(); + if let Some(bin) = cliargs.next() { + args.insert(0, bin); + } + args.extend(cliargs); + log::debug!("final argv: {:?}", args); + Ok(ArgMatches(clap_matches(args)?)) + } + + /// Convert the result of parsing CLI arguments into ripgrep's higher level + /// configuration structure. + fn to_args(self) -> Result<Args> { + // We compute these once since they could be large. + let patterns = self.patterns()?; + let matcher = self.matcher(&patterns)?; + let mut paths = self.paths(); + let using_default_path = if paths.is_empty() { + paths.push(self.path_default()); + true + } else { + false + }; + Ok(Args(Arc::new(ArgsImp { + matches: self, + patterns: patterns, + matcher: matcher, + paths: paths, + using_default_path: using_default_path, + }))) + } +} + +/// High level routines for converting command line arguments into various +/// data structures used by ripgrep. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Return the matcher that should be used for searching. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + #[cfg(feature = "pcre2")] + fn matcher(&self, patterns: &[String]) -> Result<PatternMatcher> { + if self.is_present("pcre2") { + let matcher = self.matcher_pcre2(patterns)?; + Ok(PatternMatcher::PCRE2(matcher)) + } else if self.is_present("auto-hybrid-regex") { + let rust_err = match self.matcher_rust(patterns) { + Ok(matcher) => return Ok(PatternMatcher::RustRegex(matcher)), + Err(err) => err, + }; + log::debug!( + "error building Rust regex in hybrid mode:\n{}", + rust_err, + ); + let pcre_err = match self.matcher_pcre2(patterns) { + Ok(matcher) => return Ok(PatternMatcher::PCRE2(matcher)), + Err(err) => err, + }; + Err(From::from(format!( + "regex could not be compiled with either the default regex \ + engine or with PCRE2.\n\n\ + default regex engine error:\n{}\n{}\n{}\n\n\ + PCRE2 regex engine error:\n{}", + "~".repeat(79), + rust_err, + "~".repeat(79), + pcre_err, + ))) + } else { + let matcher = match self.matcher_rust(patterns) { + Ok(matcher) => matcher, + Err(err) => { + return Err(From::from(suggest_pcre2(err.to_string()))); + } + }; + Ok(PatternMatcher::RustRegex(matcher)) + } + } + + /// Return the matcher that should be used for searching. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + #[cfg(not(feature = "pcre2"))] + fn matcher(&self, patterns: &[String]) -> Result<PatternMatcher> { + if self.is_present("pcre2") { + return Err(From::from( + "PCRE2 is not available in this build of ripgrep", + )); + } + let matcher = self.matcher_rust(patterns)?; + Ok(PatternMatcher::RustRegex(matcher)) + } + + /// Build a matcher using Rust's regex engine. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + fn matcher_rust(&self, patterns: &[String]) -> Result<RustRegexMatcher> { + let mut builder = RustRegexMatcherBuilder::new(); + builder + .case_smart(self.case_smart()) + .case_insensitive(self.case_insensitive()) + .multi_line(true) + .unicode(self.unicode()) + .octal(false) + .word(self.is_present("word-regexp")); + if self.is_present("multiline") { + builder.dot_matches_new_line(self.is_present("multiline-dotall")); + if self.is_present("crlf") { + builder.crlf(true).line_terminator(None); + } + } else { + builder.line_terminator(Some(b'\n')).dot_matches_new_line(false); + if self.is_present("crlf") { + builder.crlf(true); + } + // We don't need to set this in multiline mode since mulitline + // matchers don't use optimizations related to line terminators. + // Moreover, a mulitline regex used with --null-data should + // be allowed to match NUL bytes explicitly, which this would + // otherwise forbid. + if self.is_present("null-data") { + builder.line_terminator(Some(b'\x00')); + } + } + if let Some(limit) = self.regex_size_limit()? { + builder.size_limit(limit); + } + if let Some(limit) = self.dfa_size_limit()? { + builder.dfa_size_limit(limit); + } + let res = if self.is_present("fixed-strings") { + builder.build_literals(patterns) + } else { + builder.build(&patterns.join("|")) + }; + match res { + Ok(m) => Ok(m), + Err(err) => Err(From::from(suggest_multiline(err.to_string()))), + } + } + + /// Build a matcher using PCRE2. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + #[cfg(feature = "pcre2")] + fn matcher_pcre2(&self, patterns: &[String]) -> Result<PCRE2RegexMatcher> { + let mut builder = PCRE2RegexMatcherBuilder::new(); + builder + .case_smart(self.case_smart()) + .caseless(self.case_insensitive()) + .multi_line(true) + .word(self.is_present("word-regexp")); + // For whatever reason, the JIT craps out during regex compilation with + // a "no more memory" error on 32 bit systems. So don't use it there. + if cfg!(target_pointer_width = "64") { + builder + .jit_if_available(true) + // The PCRE2 docs say that 32KB is the default, and that 1MB + // should be big enough for anything. But let's crank it to + // 10MB. + .max_jit_stack_size(Some(10 * (1 << 20))); + } + if self.unicode() { + builder.utf(true).ucp(true); + if self.encoding()?.has_explicit_encoding() { + // SAFETY: If an encoding was specified, then we're guaranteed + // to get valid UTF-8, so we can disable PCRE2's UTF checking. + // (Feeding invalid UTF-8 to PCRE2 is undefined behavior.) + unsafe { + builder.disable_utf_check(); + } + } + } + if self.is_present("multiline") { + builder.dotall(self.is_present("multiline-dotall")); + } + if self.is_present("crlf") { + builder.crlf(true); + } + Ok(builder.build(&patterns.join("|"))?) + } + + /// Build a JSON printer that writes results to the given writer. + fn printer_json<W: io::Write>(&self, wtr: W) -> Result<JSON<W>> { + let mut builder = JSONBuilder::new(); + builder + .pretty(false) + .max_matches(self.max_count()?) + .always_begin_end(false); + Ok(builder.build(wtr)) + } + + /// Build a Standard printer that writes results to the given writer. + /// + /// The given paths are used to configure aspects of the printer. + /// + /// If `separator_search` is true, then the returned printer will assume + /// the responsibility of printing a separator between each set of + /// search results, when appropriate (e.g., when contexts are enabled). + /// When it's set to false, the caller is responsible for handling + /// separators. + /// + /// In practice, we want the printer to handle it in the single threaded + /// case but not in the multi-threaded case. + fn printer_standard<W: WriteColor>( + &self, + paths: &[PathBuf], + wtr: W, + separator_search: bool, + ) -> Result<Standard<W>> { + let mut builder = StandardBuilder::new(); + builder + .color_specs(self.color_specs()?) + .stats(self.stats()) + .heading(self.heading()) + .path(self.with_filename(paths)) + .only_matching(self.is_present("only-matching")) + .per_match(self.is_present("vimgrep")) + .replacement(self.replacement()) + .max_columns(self.max_columns()?) + .max_columns_preview(self.max_columns_preview()) + .max_matches(self.max_count()?) + .column(self.column()) + .byte_offset(self.is_present("byte-offset")) + .trim_ascii(self.is_present("trim")) + .separator_search(None) + .separator_context(self.context_separator()) + .separator_field_match(b":".to_vec()) + .separator_field_context(b"-".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + if separator_search { + builder.separator_search(self.file_separator()?); + } + Ok(builder.build(wtr)) + } + + /// Build a Summary printer that writes results to the given writer. + /// + /// The given paths are used to configure aspects of the printer. + /// + /// This panics if the output format is not `OutputKind::Summary`. + fn printer_summary<W: WriteColor>( + &self, + paths: &[PathBuf], + wtr: W, + ) -> Result<Summary<W>> { + let mut builder = SummaryBuilder::new(); + builder + .kind(self.summary_kind().expect("summary format")) + .color_specs(self.color_specs()?) + .stats(self.stats()) + .path(self.with_filename(paths)) + .max_matches(self.max_count()?) + .exclude_zero(!self.is_present("include-zero")) + .separator_field(b":".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + Ok(builder.build(wtr)) + } + + /// Build a searcher from the command line parameters. + fn searcher(&self, paths: &[PathBuf]) -> Result<Searcher> { + let (ctx_before, ctx_after) = self.contexts()?; + let line_term = if self.is_present("crlf") { + LineTerminator::crlf() + } else if self.is_present("null-data") { + LineTerminator::byte(b'\x00') + } else { + LineTerminator::byte(b'\n') + }; + let mut builder = SearcherBuilder::new(); + builder + .line_terminator(line_term) + .invert_match(self.is_present("invert-match")) + .line_number(self.line_number(paths)) + .multi_line(self.is_present("multiline")) + .before_context(ctx_before) + .after_context(ctx_after) + .passthru(self.is_present("passthru")) + .memory_map(self.mmap_choice(paths)); + match self.encoding()? { + EncodingMode::Some(enc) => { + builder.encoding(Some(enc)); + } + EncodingMode::Auto => {} // default for the searcher + EncodingMode::Disabled => { + builder.bom_sniffing(false); + } + } + Ok(builder.build()) + } + + /// Return a builder for recursively traversing a directory while + /// respecting ignore rules. + /// + /// If there was a problem parsing the CLI arguments necessary for + /// constructing the builder, then this returns an error. + fn walker_builder(&self, paths: &[PathBuf]) -> Result<WalkBuilder> { + let mut builder = WalkBuilder::new(&paths[0]); + for path in &paths[1..] { + builder.add(path); + } + for path in self.ignore_paths() { + if let Some(err) = builder.add_ignore(path) { + ignore_message!("{}", err); + } + } + builder + .max_depth(self.usize_of("max-depth")?) + .follow_links(self.is_present("follow")) + .max_filesize(self.max_file_size()?) + .threads(self.threads()?) + .same_file_system(self.is_present("one-file-system")) + .skip_stdout(!self.is_present("files")) + .overrides(self.overrides()?) + .types(self.types()?) + .hidden(!self.hidden()) + .parents(!self.no_ignore_parent()) + .ignore(!self.no_ignore_dot()) + .git_global(!self.no_ignore_vcs() && !self.no_ignore_global()) + .git_ignore(!self.no_ignore_vcs()) + .git_exclude(!self.no_ignore_vcs() && !self.no_ignore_exclude()) + .require_git(!self.is_present("no-require-git")) + .ignore_case_insensitive(self.ignore_file_case_insensitive()); + if !self.no_ignore() { + builder.add_custom_ignore_filename(".rgignore"); + } + let sortby = self.sort_by()?; + sortby.check()?; + sortby.configure_walk_builder(&mut builder); + Ok(builder) + } +} + +/// Mid level routines for converting command line arguments into various types +/// of data structures. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Returns the form of binary detection to perform on files that are + /// implicitly searched via recursive directory traversal. + fn binary_detection_implicit(&self) -> BinaryDetection { + let none = self.is_present("text") || self.is_present("null-data"); + let convert = + self.is_present("binary") || self.unrestricted_count() >= 3; + if none { + BinaryDetection::none() + } else if convert { + BinaryDetection::convert(b'\x00') + } else { + BinaryDetection::quit(b'\x00') + } + } + + /// Returns the form of binary detection to perform on files that are + /// explicitly searched via the user invoking ripgrep on a particular + /// file or files or stdin. + /// + /// In general, this should never be BinaryDetection::quit, since that acts + /// as a filter (but quitting immediately once a NUL byte is seen), and we + /// should never filter out files that the user wants to explicitly search. + fn binary_detection_explicit(&self) -> BinaryDetection { + let none = self.is_present("text") || self.is_present("null-data"); + if none { + BinaryDetection::none() + } else { + BinaryDetection::convert(b'\x00') + } + } + + /// Returns true if the command line configuration implies that a match + /// can never be shown. + fn can_never_match(&self, patterns: &[String]) -> bool { + patterns.is_empty() || self.max_count().ok() == Some(Some(0)) + } + + /// Returns true if and only if case should be ignore. + /// + /// If --case-sensitive is present, then case is never ignored, even if + /// --ignore-case is present. + fn case_insensitive(&self) -> bool { + self.is_present("ignore-case") && !self.is_present("case-sensitive") + } + + /// Returns true if and only if smart case has been enabled. + /// + /// If either --ignore-case of --case-sensitive are present, then smart + /// case is disabled. + fn case_smart(&self) -> bool { + self.is_present("smart-case") + && !self.is_present("ignore-case") + && !self.is_present("case-sensitive") + } + + /// Returns the user's color choice based on command line parameters and + /// environment. + fn color_choice(&self) -> ColorChoice { + let preference = match self.value_of_lossy("color") { + None => "auto".to_string(), + Some(v) => v, + }; + if preference == "always" { + ColorChoice::Always + } else if preference == "ansi" { + ColorChoice::AlwaysAnsi + } else if preference == "auto" { + if cli::is_tty_stdout() || self.is_present("pretty") { + ColorChoice::Auto + } else { + ColorChoice::Never + } + } else { + ColorChoice::Never + } + } + + /// Returns the color specifications given by the user on the CLI. + /// + /// If the was a problem parsing any of the provided specs, then an error + /// is returned. + fn color_specs(&self) -> Result<ColorSpecs> { + // Start with a default set of color specs. + let mut specs = default |