diff options
author | Marc Tiehuis <marctiehuis@gmail.com> | 2017-04-02 09:55:58 +1200 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2017-04-12 18:14:23 -0400 |
commit | 66efbad871620fe2dbe675438fdc8d9922e72826 (patch) | |
tree | f29d2e1b4da5adb8eb6c94d45c5e239b44030e82 /src | |
parent | 1f2a9b03062d14491b04a644409fdb8e62f77711 (diff) |
Add dfa-size-limit and regex-size-limit arguments
Fixes #362.
Diffstat (limited to 'src')
-rw-r--r-- | src/app.rs | 22 | ||||
-rw-r--r-- | src/args.rs | 96 |
2 files changed, 98 insertions, 20 deletions
@@ -41,7 +41,9 @@ OPTIONS: /// in a `build.rs` script to build shell completion files. pub fn app() -> App<'static, 'static> { let arg = |name| { - Arg::with_name(name).help(USAGES[name].short).long_help(USAGES[name].long) + Arg::with_name(name) + .help(USAGES[name].short) + .long_help(USAGES[name].long) }; let flag = |name| arg(name).long(name); @@ -53,7 +55,7 @@ pub fn app() -> App<'static, 'static> { .setting(AppSettings::UnifiedHelpMessage) .usage(USAGE) .template(TEMPLATE) - .help_message("Prints help information. Use --help for more details.") + .help_message("Prints help information. Use --help for more details.") // First, set up primary positional/flag arguments. .arg(arg("pattern") .required_unless_one(&[ @@ -114,6 +116,8 @@ pub fn app() -> App<'static, 'static> { .arg(flag("column")) .arg(flag("context-separator") .value_name("SEPARATOR").takes_value(true)) + .arg(flag("dfa-size-limit") + .value_name("NUM+SUFFIX?").takes_value(true)) .arg(flag("debug")) .arg(flag("file").short("f") .value_name("FILE").takes_value(true) @@ -148,6 +152,8 @@ pub fn app() -> App<'static, 'static> { .arg(flag("path-separator").value_name("SEPARATOR").takes_value(true)) .arg(flag("pretty").short("p")) .arg(flag("replace").short("r").value_name("ARG").takes_value(true)) + .arg(flag("regex-size-limit") + .value_name("NUM+SUFFIX?").takes_value(true)) .arg(flag("case-sensitive").short("s")) .arg(flag("smart-case").short("S")) .arg(flag("sort-files")) @@ -326,6 +332,13 @@ lazy_static! { doc!(h, "debug", "Show debug messages.", "Show debug messages. Please use this when filing a bug report."); + doc!(h, "dfa-size-limit", + "The upper size limit of the generated dfa.", + "The upper size limit of the generated dfa. The default limit is \ + 10M. This should only be changed on very large regex inputs \ + where the (slower) fallback regex engine may otherwise be used. \ + \n\nThe argument accepts the same size suffixes as allowed in \ + the 'max-filesize' argument."); doc!(h, "file", "Search for patterns from the given file.", "Search for patterns from the given file, with one pattern per \ @@ -444,6 +457,11 @@ lazy_static! { Note that the replacement by default replaces each match, and \ NOT the entire line. To replace the entire line, you should \ match the entire line."); + doc!(h, "regex-size-limit", + "The upper size limit of the compiled regex.", + "The upper size limit of the compiled regex. The default limit \ + is 10M. \n\nThe argument accepts the same size suffixes as \ + allowed in the 'max-filesize' argument."); doc!(h, "case-sensitive", "Search case sensitively.", "Search case sensitively. This overrides -i/--ignore-case and \ diff --git a/src/args.rs b/src/args.rs index 34f38792..48c4ad56 100644 --- a/src/args.rs +++ b/src/args.rs @@ -771,12 +771,18 @@ impl<'a> ArgMatches<'a> { let casei = self.is_present("ignore-case") && !self.is_present("case-sensitive"); - GrepBuilder::new(&try!(self.pattern())) + let mut gb = GrepBuilder::new(&try!(self.pattern())) .case_smart(smart) .case_insensitive(casei) - .line_terminator(b'\n') - .build() - .map_err(From::from) + .line_terminator(b'\n'); + + if let Some(limit) = try!(self.dfa_size_limit()) { + gb = gb.dfa_size_limit(limit); + } + if let Some(limit) = try!(self.regex_size_limit()) { + gb = gb.size_limit(limit); + } + gb.build().map_err(From::from) } /// Builds the set of glob overrides from the command line flags. @@ -807,31 +813,64 @@ impl<'a> ArgMatches<'a> { btypes.build().map_err(From::from) } - /// Parses the max-filesize argument option into a byte count. - fn max_filesize(&self) -> Result<Option<u64>> { - use regex::Regex; - - let max_filesize = match self.value_of_lossy("max-filesize") { + /// Parses an argument of the form `[0-9]+(KMG)?`. + /// + /// This always returns the result as a type `u64`. This must be converted + /// to the appropriate type by the caller. + fn parse_human_readable_size_arg( + &self, + arg_name: &str, + ) -> Result<Option<u64>> { + let arg_value = match self.value_of_lossy(arg_name) { Some(x) => x, None => return Ok(None) }; + let re = regex::Regex::new("^([0-9]+)([KMG])?$").unwrap(); + let caps = try!( + re.captures(&arg_value).ok_or_else(|| { + format!("invalid format for {}", arg_name) + })); - let re = Regex::new("^([0-9]+)([KMG])?$").unwrap(); - let caps = try!(re.captures(&max_filesize) - .ok_or("invalid format for max-filesize argument")); - - let value = try!(caps[1].parse::<u64>().map_err(|err|err.to_string())); + let value = try!(caps[1].parse::<u64>()); let suffix = caps.get(2).map(|x| x.as_str()); + let v_10 = value.checked_mul(1024); + let v_20 = v_10.and_then(|x| x.checked_mul(1024)); + let v_30 = v_20.and_then(|x| x.checked_mul(1024)); + + let try_suffix = |x: Option<u64>| { + if x.is_some() { + Ok(x) + } else { + Err(From::from(format!("number too large for {}", arg_name))) + } + }; match suffix { None => Ok(Some(value)), - Some("K") => Ok(Some(value * 1024)), - Some("M") => Ok(Some(value * 1024 * 1024)), - Some("G") => Ok(Some(value * 1024 * 1024 * 1024)), - _ => Err(From::from("invalid suffix for max-filesize argument")) + Some("K") => try_suffix(v_10), + Some("M") => try_suffix(v_20), + Some("G") => try_suffix(v_30), + _ => Err(From::from(format!("invalid suffix for {}", arg_name))) } } + /// Parse the dfa-size-limit argument option into a byte count. + fn dfa_size_limit(&self) -> Result<Option<usize>> { + let r = try!(self.parse_human_readable_size_arg("dfa-size-limit")); + human_readable_to_usize("dfa-size-limit", r) + } + + /// Parse the regex-size-limit argument option into a byte count. + fn regex_size_limit(&self) -> Result<Option<usize>> { + let r = try!(self.parse_human_readable_size_arg("regex-size-limit")); + human_readable_to_usize("regex-size-limit", r) + } + + /// Parses the max-filesize argument option into a byte count. + fn max_filesize(&self) -> Result<Option<u64>> { + self.parse_human_readable_size_arg("max-filesize") + } + /// Returns true if ignore files should be ignored. fn no_ignore(&self) -> bool { self.is_present("no-ignore") @@ -926,6 +965,27 @@ impl QuietMatched { } } +/// Convert the result of a `parse_human_readable_size_arg` call into +/// a `usize`, failing if the type does not fit. +fn human_readable_to_usize( + arg_name: &str, + value: Option<u64>, +) -> Result<Option<usize>> { + use std::usize; + + match value { + None => Ok(None), + Some(v) => { + if v <= usize::MAX as u64 { + Ok(Some(v as usize)) + } else { + let msg = format!("number too large for {}", arg_name); + Err(From::from(msg)) + } + } + } +} + /// Returns true if and only if stdin is deemed searchable. #[cfg(unix)] fn stdin_is_readable() -> bool { |