summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2019-04-14 17:39:37 -0400
committerAndrew Gallant <jamslam@gmail.com>2019-04-14 19:29:27 -0400
commit967e7ad0de3e657e8f81c1384a48ca1eea4edde7 (patch)
treecabe8250acf34508fc60724104fa05bbcd257295
parent9952ba2068bd0f9baf74a2094d0d1db493b71180 (diff)
ripgrep: add --auto-hybrid-regex flag
This flag, when set, will automatically dispatch to PCRE2 if the given regex cannot be compiled by Rust's regex engine. If both engines fail to compile the regex, then both errors are surfaced. Closes #1155
-rw-r--r--CHANGELOG.md2
-rw-r--r--complete/_rg4
-rw-r--r--src/app.rs53
-rw-r--r--src/args.rs19
-rw-r--r--tests/feature.rs15
5 files changed, 91 insertions, 2 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f36245f3..f80f460a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -49,6 +49,8 @@ Feature enhancements:
Add support for Brotli and Zstd to the `-z/--search-zip` flag.
* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138):
Add `--no-ignore-dot` flag for ignoring `.ignore` files.
+* [FEATURE #1155](https://github.com/BurntSushi/ripgrep/pull/1155):
+ Add `--auto-hybrid-regex` flag for automatically falling back to PCRE2.
* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159):
ripgrep's exit status logic should now match GNU grep. See updated man page.
* [FEATURE #1170](https://github.com/BurntSushi/ripgrep/pull/1170):
diff --git a/complete/_rg b/complete/_rg
index f26a688d..ac3a52a1 100644
--- a/complete/_rg
+++ b/complete/_rg
@@ -112,6 +112,10 @@ _rg() {
'--hidden[search hidden files and directories]'
$no"--no-hidden[don't search hidden files and directories]"
+ + '(hybrid)' # hybrid regex options
+ '--auto-hybrid-regex[dynamically use PCRE2 if necessary]'
+ $no"--no-auto-hybrid-regex[don't dynamically use PCRE2 if necessary]"
+
+ '(ignore)' # Ignore-file options
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]"
$no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]'
diff --git a/src/app.rs b/src/app.rs
index b102d7cd..7ec54118 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -547,6 +547,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
// flags are hidden and merely mentioned in the docs of the corresponding
// "positive" flag.
flag_after_context(&mut args);
+ flag_auto_hybrid_regex(&mut args);
flag_before_context(&mut args);
flag_binary(&mut args);
flag_block_buffered(&mut args);
@@ -683,6 +684,50 @@ This overrides the --context flag.
args.push(arg);
}
+fn flag_auto_hybrid_regex(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Dynamically use PCRE2 if necessary.";
+ const LONG: &str = long!("\
+When this flag is used, ripgrep will dynamically choose between supported regex
+engines depending on the features used in a pattern. When ripgrep chooses a
+regex engine, it applies that choice for every regex provided to ripgrep (e.g.,
+via multiple -e/--regexp or -f/--file flags).
+
+As an example of how this flag might behave, ripgrep will attempt to use
+its default finite automata based regex engine whenever the pattern can be
+successfully compiled with that regex engine. If PCRE2 is enabled and if the
+pattern given could not be compiled with the default regex engine, then PCRE2
+will be automatically used for searching. If PCRE2 isn't available, then this
+flag has no effect because there is only one regex engine to choose from.
+
+In the future, ripgrep may adjust its heuristics for how it decides which
+regex engine to use. In general, the heuristics will be limited to a static
+analysis of the patterns, and not to any specific runtime behavior observed
+while searching files.
+
+The primary downside of using this flag is that it may not always be obvious
+which regex engine ripgrep uses, and thus, the match semantics or performance
+profile of ripgrep may subtly and unexpectedly change. However, in many cases,
+all regex engines will agree on what constitutes a match and it can be nice
+to transparently support more advanced regex features like look-around and
+backreferences without explicitly needing to enable them.
+
+This flag can be disabled with --no-auto-hybrid-regex.
+");
+ let arg = RGArg::switch("auto-hybrid-regex")
+ .help(SHORT).long_help(LONG)
+ .overrides("no-auto-hybrid-regex")
+ .overrides("pcre2")
+ .overrides("no-pcre2");
+ args.push(arg);
+
+ let arg = RGArg::switch("no-auto-hybrid-regex")
+ .hidden()
+ .overrides("auto-hybrid-regex")
+ .overrides("pcre2")
+ .overrides("no-pcre2");
+ args.push(arg);
+}
+
fn flag_before_context(args: &mut Vec<RGArg>) {
const SHORT: &str = "Show NUM lines before each match.";
const LONG: &str = long!("\
@@ -1938,12 +1983,16 @@ This flag can be disabled with --no-pcre2.
");
let arg = RGArg::switch("pcre2").short("P")
.help(SHORT).long_help(LONG)
- .overrides("no-pcre2");
+ .overrides("no-pcre2")
+ .overrides("auto-hybrid-regex")
+ .overrides("no-auto-hybrid-regex");
args.push(arg);
let arg = RGArg::switch("no-pcre2")
.hidden()
- .overrides("pcre2");
+ .overrides("pcre2")
+ .overrides("auto-hybrid-regex")
+ .overrides("no-auto-hybrid-regex");
args.push(arg);
}
diff --git a/src/args.rs b/src/args.rs
index 389de1dd..80693da8 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -599,6 +599,25 @@ impl ArgMatches {
if self.is_present("pcre2") {
let matcher = self.matcher_pcre2(patterns)?;
Ok(PatternMatcher::PCRE2(matcher))
+ } else if self.is_present("auto-hybrid-regex") {
+ let rust_err = match self.matcher_rust(patterns) {
+ Ok(matcher) => return Ok(PatternMatcher::RustRegex(matcher)),
+ Err(err) => err,
+ };
+ log::debug!(
+ "error building Rust regex in hybrid mode:\n{}", rust_err,
+ );
+ let pcre_err = match self.matcher_pcre2(patterns) {
+ Ok(matcher) => return Ok(PatternMatcher::PCRE2(matcher)),
+ Err(err) => err,
+ };
+ Err(From::from(format!(
+ "regex could not be compiled with either the default regex \
+ engine or with PCRE2.\n\n\
+ default regex engine error:\n{}\n{}\n{}\n\n\
+ PCRE2 regex engine error:\n{}",
+ "~".repeat(79), rust_err, "~".repeat(79), pcre_err,
+ )))
} else {
let matcher = match self.matcher_rust(patterns) {
Ok(matcher) => matcher,
diff --git a/tests/feature.rs b/tests/feature.rs
index 6ee2bf87..be9f4bec 100644
--- a/tests/feature.rs
+++ b/tests/feature.rs
@@ -681,6 +681,21 @@ rgtest!(f1138_no_ignore_dot, |dir: Dir, mut cmd: TestCommand| {
eqnice!("bar\n", cmd.arg("--ignore-file").arg(".fzf-ignore").stdout());
});
+// See: https://github.com/BurntSushi/ripgrep/issues/1155
+rgtest!(f1155_auto_hybrid_regex, |dir: Dir, mut cmd: TestCommand| {
+ // No sense in testing a hybrid regex engine with only one engine!
+ if !dir.is_pcre2() {
+ return;
+ }
+
+ dir.create("sherlock", SHERLOCK);
+ cmd.arg("--no-pcre2").arg("--auto-hybrid-regex").arg(r"(?<=the )Sherlock");
+
+ let expected = "\
+sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+";
+ eqnice!(expected, cmd.stdout());
+});
// See: https://github.com/BurntSushi/ripgrep/issues/1207
//