summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarc Tiehuis <marctiehuis@gmail.com>2017-04-02 09:55:58 +1200
committerAndrew Gallant <jamslam@gmail.com>2017-04-12 18:14:23 -0400
commit66efbad871620fe2dbe675438fdc8d9922e72826 (patch)
treef29d2e1b4da5adb8eb6c94d45c5e239b44030e82
parent1f2a9b03062d14491b04a644409fdb8e62f77711 (diff)
Add dfa-size-limit and regex-size-limit arguments
Fixes #362.
-rw-r--r--src/app.rs22
-rw-r--r--src/args.rs96
-rw-r--r--tests/tests.rs44
3 files changed, 139 insertions, 23 deletions
diff --git a/src/app.rs b/src/app.rs
index b8f25b87..44e49517 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -41,7 +41,9 @@ OPTIONS:
/// in a `build.rs` script to build shell completion files.
pub fn app() -> App<'static, 'static> {
let arg = |name| {
- Arg::with_name(name).help(USAGES[name].short).long_help(USAGES[name].long)
+ Arg::with_name(name)
+ .help(USAGES[name].short)
+ .long_help(USAGES[name].long)
};
let flag = |name| arg(name).long(name);
@@ -53,7 +55,7 @@ pub fn app() -> App<'static, 'static> {
.setting(AppSettings::UnifiedHelpMessage)
.usage(USAGE)
.template(TEMPLATE)
- .help_message("Prints help information. Use --help for more details.")
+ .help_message("Prints help information. Use --help for more details.")
// First, set up primary positional/flag arguments.
.arg(arg("pattern")
.required_unless_one(&[
@@ -114,6 +116,8 @@ pub fn app() -> App<'static, 'static> {
.arg(flag("column"))
.arg(flag("context-separator")
.value_name("SEPARATOR").takes_value(true))
+ .arg(flag("dfa-size-limit")
+ .value_name("NUM+SUFFIX?").takes_value(true))
.arg(flag("debug"))
.arg(flag("file").short("f")
.value_name("FILE").takes_value(true)
@@ -148,6 +152,8 @@ pub fn app() -> App<'static, 'static> {
.arg(flag("path-separator").value_name("SEPARATOR").takes_value(true))
.arg(flag("pretty").short("p"))
.arg(flag("replace").short("r").value_name("ARG").takes_value(true))
+ .arg(flag("regex-size-limit")
+ .value_name("NUM+SUFFIX?").takes_value(true))
.arg(flag("case-sensitive").short("s"))
.arg(flag("smart-case").short("S"))
.arg(flag("sort-files"))
@@ -326,6 +332,13 @@ lazy_static! {
doc!(h, "debug",
"Show debug messages.",
"Show debug messages. Please use this when filing a bug report.");
+ doc!(h, "dfa-size-limit",
+ "The upper size limit of the generated dfa.",
+ "The upper size limit of the generated dfa. The default limit is \
+ 10M. This should only be changed on very large regex inputs \
+ where the (slower) fallback regex engine may otherwise be used. \
+ \n\nThe argument accepts the same size suffixes as allowed in \
+ the 'max-filesize' argument.");
doc!(h, "file",
"Search for patterns from the given file.",
"Search for patterns from the given file, with one pattern per \
@@ -444,6 +457,11 @@ lazy_static! {
Note that the replacement by default replaces each match, and \
NOT the entire line. To replace the entire line, you should \
match the entire line.");
+ doc!(h, "regex-size-limit",
+ "The upper size limit of the compiled regex.",
+ "The upper size limit of the compiled regex. The default limit \
+ is 10M. \n\nThe argument accepts the same size suffixes as \
+ allowed in the 'max-filesize' argument.");
doc!(h, "case-sensitive",
"Search case sensitively.",
"Search case sensitively. This overrides -i/--ignore-case and \
diff --git a/src/args.rs b/src/args.rs
index 34f38792..48c4ad56 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -771,12 +771,18 @@ impl<'a> ArgMatches<'a> {
let casei =
self.is_present("ignore-case")
&& !self.is_present("case-sensitive");
- GrepBuilder::new(&try!(self.pattern()))
+ let mut gb = GrepBuilder::new(&try!(self.pattern()))
.case_smart(smart)
.case_insensitive(casei)
- .line_terminator(b'\n')
- .build()
- .map_err(From::from)
+ .line_terminator(b'\n');
+
+ if let Some(limit) = try!(self.dfa_size_limit()) {
+ gb = gb.dfa_size_limit(limit);
+ }
+ if let Some(limit) = try!(self.regex_size_limit()) {
+ gb = gb.size_limit(limit);
+ }
+ gb.build().map_err(From::from)
}
/// Builds the set of glob overrides from the command line flags.
@@ -807,31 +813,64 @@ impl<'a> ArgMatches<'a> {
btypes.build().map_err(From::from)
}
- /// Parses the max-filesize argument option into a byte count.
- fn max_filesize(&self) -> Result<Option<u64>> {
- use regex::Regex;
-
- let max_filesize = match self.value_of_lossy("max-filesize") {
+ /// Parses an argument of the form `[0-9]+(KMG)?`.
+ ///
+ /// This always returns the result as a type `u64`. This must be converted
+ /// to the appropriate type by the caller.
+ fn parse_human_readable_size_arg(
+ &self,
+ arg_name: &str,
+ ) -> Result<Option<u64>> {
+ let arg_value = match self.value_of_lossy(arg_name) {
Some(x) => x,
None => return Ok(None)
};
+ let re = regex::Regex::new("^([0-9]+)([KMG])?$").unwrap();
+ let caps = try!(
+ re.captures(&arg_value).ok_or_else(|| {
+ format!("invalid format for {}", arg_name)
+ }));
- let re = Regex::new("^([0-9]+)([KMG])?$").unwrap();
- let caps = try!(re.captures(&max_filesize)
- .ok_or("invalid format for max-filesize argument"));
-
- let value = try!(caps[1].parse::<u64>().map_err(|err|err.to_string()));
+ let value = try!(caps[1].parse::<u64>());
let suffix = caps.get(2).map(|x| x.as_str());
+ let v_10 = value.checked_mul(1024);
+ let v_20 = v_10.and_then(|x| x.checked_mul(1024));
+ let v_30 = v_20.and_then(|x| x.checked_mul(1024));
+
+ let try_suffix = |x: Option<u64>| {
+ if x.is_some() {
+ Ok(x)
+ } else {
+ Err(From::from(format!("number too large for {}", arg_name)))
+ }
+ };
match suffix {
None => Ok(Some(value)),
- Some("K") => Ok(Some(value * 1024)),
- Some("M") => Ok(Some(value * 1024 * 1024)),
- Some("G") => Ok(Some(value * 1024 * 1024 * 1024)),
- _ => Err(From::from("invalid suffix for max-filesize argument"))
+ Some("K") => try_suffix(v_10),
+ Some("M") => try_suffix(v_20),
+ Some("G") => try_suffix(v_30),
+ _ => Err(From::from(format!("invalid suffix for {}", arg_name)))
}
}
+ /// Parse the dfa-size-limit argument option into a byte count.
+ fn dfa_size_limit(&self) -> Result<Option<usize>> {
+ let r = try!(self.parse_human_readable_size_arg("dfa-size-limit"));
+ human_readable_to_usize("dfa-size-limit", r)
+ }
+
+ /// Parse the regex-size-limit argument option into a byte count.
+ fn regex_size_limit(&self) -> Result<Option<usize>> {
+ let r = try!(self.parse_human_readable_size_arg("regex-size-limit"));
+ human_readable_to_usize("regex-size-limit", r)
+ }
+
+ /// Parses the max-filesize argument option into a byte count.
+ fn max_filesize(&self) -> Result<Option<u64>> {
+ self.parse_human_readable_size_arg("max-filesize")
+ }
+
/// Returns true if ignore files should be ignored.
fn no_ignore(&self) -> bool {
self.is_present("no-ignore")
@@ -926,6 +965,27 @@ impl QuietMatched {
}
}
+/// Convert the result of a `parse_human_readable_size_arg` call into
+/// a `usize`, failing if the type does not fit.
+fn human_readable_to_usize(
+ arg_name: &str,
+ value: Option<u64>,
+) -> Result<Option<usize>> {
+ use std::usize;
+
+ match value {
+ None => Ok(None),
+ Some(v) => {
+ if v <= usize::MAX as u64 {
+ Ok(Some(v as usize))
+ } else {
+ let msg = format!("number too large for {}", arg_name);
+ Err(From::from(msg))
+ }
+ }
+ }
+}
+
/// Returns true if and only if stdin is deemed searchable.
#[cfg(unix)]
fn stdin_is_readable() -> bool {
diff --git a/tests/tests.rs b/tests/tests.rs
index 578dbcba..ef332ae7 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -455,7 +455,6 @@ sherlock!(max_filesize_parse_no_suffix, "Sherlock", ".",
let expected = "\
foo
";
-
assert_eq!(lines, expected);
});
@@ -470,7 +469,6 @@ sherlock!(max_filesize_parse_k_suffix, "Sherlock", ".",
let expected = "\
foo
";
-
assert_eq!(lines, expected);
});
@@ -485,10 +483,19 @@ sherlock!(max_filesize_parse_m_suffix, "Sherlock", ".",
let expected = "\
foo
";
-
assert_eq!(lines, expected);
});
+sherlock!(max_filesize_suffix_overflow, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+ wd.remove("sherlock");
+ wd.create_size("foo", 1000000);
+
+ // 2^35 * 2^30 would otherwise overflow
+ cmd.arg("--max-filesize").arg("34359738368G").arg("--files");
+ wd.assert_err(&mut cmd);
+});
+
sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create(".sherlock", hay::SHERLOCK);
@@ -1443,6 +1450,37 @@ clean!(feature_275_pathsep, "test", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, "fooZbar:test\n");
});
+// See: https://github.com/BurntSushi/ripgrep/issues/362
+sherlock!(feature_362_dfa_size_limit, r"For\s",
+|wd: WorkDir, mut cmd: Command| {
+ // This should fall back to the nfa engine but should still produce the
+ // expected result.
+ cmd.arg("--dfa-size-limit").arg("10");
+ let lines: String = wd.stdout(&mut cmd);
+ let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+";
+ assert_eq!(lines, expected);
+});
+
+sherlock!(feature_362_exceeds_regex_size_limit, r"[0-9]\w+",
+|wd: WorkDir, mut cmd: Command| {
+ cmd.arg("--regex-size-limit").arg("10K");
+ wd.assert_err(&mut cmd);
+});
+
+#[cfg(target_pointer_width = "32")]
+sherlock!(feature_362_u64_to_narrow_usize_suffix_overflow, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+ wd.remove("sherlock");
+ wd.create_size("foo", 1000000);
+
+ // 2^35 * 2^20 is ok for u64, but not for usize
+ cmd.arg("--dfa-size-limit").arg("34359738368M").arg("--files");
+ wd.assert_err(&mut cmd);
+});
+
+
// See: https://github.com/BurntSushi/ripgrep/issues/419
sherlock!(feature_419_zero_as_shortcut_for_null, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {