summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEdoardo Pirovano <edoardo.pirovano@gmail.com>2021-07-07 17:50:23 +0100
committerAndrew Gallant <jamslam@gmail.com>2023-07-08 18:52:42 -0400
commit6d95c130d5fb56a8641092a4808f33640bfa935c (patch)
tree6948223cf4f179cfcdc8a465dad2ed7076e02c9f
parent4782ebd5e0773465ba2c32a328250253c2b55779 (diff)
cli: add --stop-on-nonmatch flag
This causes ripgrep to stop searching an individual file after it has found a non-matching line. But this only occurs after it has found a matching line. Fixes #1790, Closes #1930
-rw-r--r--CHANGELOG.md2
-rw-r--r--complete/_rg1
-rw-r--r--crates/core/app.rs25
-rw-r--r--crates/core/args.rs3
-rw-r--r--crates/searcher/src/searcher/core.rs49
-rw-r--r--crates/searcher/src/searcher/mod.rs24
-rw-r--r--tests/feature.rs7
7 files changed, 99 insertions, 12 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index baa58b5c..4dda5eab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,8 @@ Unreleased changes. Release notes have not yet been written.
Feature enhancements:
* Added or improved file type filtering for Ada, DITA, Elixir, Fuchsia, Gentoo, GraphQL, Markdown, Raku, TypeScript, USD, V
+* [FEATURE #1790](https://github.com/BurntSushi/ripgrep/issues/1790):
+ Add new `--stop-on-nonmatch` flag.
* [FEATURE #2195](https://github.com/BurntSushi/ripgrep/issues/2195):
When `extra-verbose` mode is enabled in zsh, show extra file type info.
* [FEATURE #2409](https://github.com/BurntSushi/ripgrep/pull/2409):
diff --git a/complete/_rg b/complete/_rg
index 0af8d7af..657648ca 100644
--- a/complete/_rg
+++ b/complete/_rg
@@ -319,6 +319,7 @@ _rg() {
'(-q --quiet)'{-q,--quiet}'[suppress normal output]'
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)'
'*'{-u,--unrestricted}'[reduce level of "smart" searching]'
+ '--stop-on-nonmatch[stop on first non-matching line after a matching one]'
+ operand # Operands
'(--files --type-list file regexp)1: :_guard "^-*" pattern'
diff --git a/crates/core/app.rs b/crates/core/app.rs
index d38bf986..875c880b 100644
--- a/crates/core/app.rs
+++ b/crates/core/app.rs
@@ -632,6 +632,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_sort(&mut args);
flag_sortr(&mut args);
flag_stats(&mut args);
+ flag_stop_on_nonmatch(&mut args);
flag_text(&mut args);
flag_threads(&mut args);
flag_trim(&mut args);
@@ -1926,13 +1927,16 @@ Nevertheless, if you only care about matches spanning at most one line, then it
is always better to disable multiline mode.
This flag can be disabled with --no-multiline.
+
+This overrides the --stop-on-nonmatch flag.
"
);
let arg = RGArg::switch("multiline")
.short("U")
.help(SHORT)
.long_help(LONG)
- .overrides("no-multiline");
+ .overrides("no-multiline")
+ .overrides("stop-on-nonmatch");
args.push(arg);
let arg = RGArg::switch("no-multiline").hidden().overrides("multiline");
@@ -2854,6 +2858,25 @@ This flag can be disabled with --no-stats.
args.push(arg);
}
+fn flag_stop_on_nonmatch(args: &mut Vec<RGArg>) {
+ const SHORT: &str = "Stop searching after a non-match.";
+ const LONG: &str = long!(
+ "\
+Enabling this option will cause ripgrep to stop reading a file once it
+encounters a non-matching line after it has encountered a matching line.
+This is useful if it is expected that all matches in a given file will be on
+sequential lines, for example due to the lines being sorted.
+
+This overrides the -U/--multiline flag.
+"
+ );
+ let arg = RGArg::switch("stop-on-nonmatch")
+ .help(SHORT)
+ .long_help(LONG)
+ .overrides("multiline");
+ args.push(arg);
+}
+
fn flag_text(args: &mut Vec<RGArg>) {
const SHORT: &str = "Search binary files as if they were text.";
const LONG: &str = long!(
diff --git a/crates/core/args.rs b/crates/core/args.rs
index ad2ee2e8..97347755 100644
--- a/crates/core/args.rs
+++ b/crates/core/args.rs
@@ -821,7 +821,8 @@ impl ArgMatches {
.before_context(ctx_before)
.after_context(ctx_after)
.passthru(self.is_present("passthru"))
- .memory_map(self.mmap_choice(paths));
+ .memory_map(self.mmap_choice(paths))
+ .stop_on_nonmatch(self.is_present("stop-on-nonmatch"));
match self.encoding()? {
EncodingMode::Some(enc) => {
builder.encoding(Some(enc));
diff --git a/crates/searcher/src/searcher/core.rs b/crates/searcher/src/searcher/core.rs
index 7d6ccd66..95b4ba6a 100644
--- a/crates/searcher/src/searcher/core.rs
+++ b/crates/searcher/src/searcher/core.rs
@@ -10,6 +10,12 @@ use crate::sink::{
};
use grep_matcher::{LineMatchKind, Matcher};
+enum FastMatchResult {
+ Continue,
+ Stop,
+ SwitchToSlow,
+}
+
#[derive(Debug)]
pub struct Core<'s, M: 's, S> {
config: &'s Config,
@@ -25,6 +31,7 @@ pub struct Core<'s, M: 's, S> {
last_line_visited: usize,
after_context_left: usize,
has_sunk: bool,
+ has_matched: bool,
}
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
@@ -50,6 +57,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
last_line_visited: 0,
after_context_left: 0,
has_sunk: false,
+ has_matched: false,
};
if !core.searcher.multi_line_with_matcher(&core.matcher) {
if core.is_line_by_line_fast() {
@@ -109,7 +117,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() {
- self.match_by_line_fast(buf)
+ match self.match_by_line_fast(buf)? {
+ FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf),
+ FastMatchResult::Continue => Ok(true),
+ FastMatchResult::Stop => Ok(false),
+ }
} else {
self.match_by_line_slow(buf)
}
@@ -270,7 +282,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
};
self.set_pos(line.end());
- if matched != self.config.invert_match {
+ let success = matched != self.config.invert_match;
+ if success {
+ self.has_matched = true;
if !self.before_context_by_line(buf, line.start())? {
return Ok(false);
}
@@ -286,40 +300,51 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
return Ok(false);
}
}
+ if self.config.stop_on_nonmatch && !success && self.has_matched {
+ return Ok(false);
+ }
}
Ok(true)
}
- fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
- debug_assert!(!self.config.passthru);
+ fn match_by_line_fast(
+ &mut self,
+ buf: &[u8],
+ ) -> Result<FastMatchResult, S::Error> {
+ use FastMatchResult::*;
+ debug_assert!(!self.config.passthru);
while !buf[self.pos()..].is_empty() {
+ if self.config.stop_on_nonmatch && self.has_matched {
+ return Ok(SwitchToSlow);
+ }
if self.config.invert_match {
if !self.match_by_line_fast_invert(buf)? {
- return Ok(false);
+ return Ok(Stop);
}
} else if let Some(line) = self.find_by_line_fast(buf)? {
+ self.has_matched = true;
if self.config.max_context() > 0 {
if !self.after_context_by_line(buf, line.start())? {
- return Ok(false);
+ return Ok(Stop);
}
if !self.before_context_by_line(buf, line.start())? {
- return Ok(false);
+ return Ok(Stop);
}
}
self.set_pos(line.end());
if !self.sink_matched(buf, &line)? {
- return Ok(false);
+ return Ok(Stop);
}
} else {
break;
}
}
if !self.after_context_by_line(buf, buf.len())? {
- return Ok(false);
+ return Ok(Stop);
}
self.set_pos(buf.len());
- Ok(true)
+ Ok(Continue)
}
#[inline(always)]
@@ -344,6 +369,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if invert_match.is_empty() {
return Ok(true);
}
+ self.has_matched = true;
if !self.after_context_by_line(buf, invert_match.start())? {
return Ok(false);
}
@@ -577,6 +603,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if self.config.passthru {
return false;
}
+ if self.config.stop_on_nonmatch && self.has_matched {
+ return false;
+ }
if let Some(line_term) = self.matcher.line_terminator() {
if line_term == self.config.line_term {
return true;
diff --git a/crates/searcher/src/searcher/mod.rs b/crates/searcher/src/searcher/mod.rs
index 3bd939bb..9b6c9bd4 100644
--- a/crates/searcher/src/searcher/mod.rs
+++ b/crates/searcher/src/searcher/mod.rs
@@ -173,6 +173,9 @@ pub struct Config {
encoding: Option<Encoding>,
/// Whether to do automatic transcoding based on a BOM or not.
bom_sniffing: bool,
+ /// Whether to stop searching when a non-matching line is found after a
+ /// matching line.
+ stop_on_nonmatch: bool,
}
impl Default for Config {
@@ -190,6 +193,7 @@ impl Default for Config {
multi_line: false,
encoding: None,
bom_sniffing: true,
+ stop_on_nonmatch: false,
}
}
}
@@ -555,6 +559,19 @@ impl SearcherBuilder {
self.config.bom_sniffing = yes;
self
}
+
+ /// Stop searching a file when a non-matching line is found after a
+ /// matching line.
+ ///
+ /// This is useful for searching sorted files where it is expected that all
+ /// the matches will be on adjacent lines.
+ pub fn stop_on_nonmatch(
+ &mut self,
+ stop_on_nonmatch: bool,
+ ) -> &mut SearcherBuilder {
+ self.config.stop_on_nonmatch = stop_on_nonmatch;
+ self
+ }
}
/// A searcher executes searches over a haystack and writes results to a caller
@@ -838,6 +855,13 @@ impl Searcher {
self.config.multi_line
}
+ /// Returns true if and only if this searcher is configured to stop when in
+ /// finds a non-matching line after a matching one.
+ #[inline]
+ pub fn stop_on_nonmatch(&self) -> bool {
+ self.config.stop_on_nonmatch
+ }
+
/// Returns true if and only if this searcher will choose a multi-line
/// strategy given the provided matcher.
///
diff --git a/tests/feature.rs b/tests/feature.rs
index 8283a1bb..6d4d1947 100644
--- a/tests/feature.rs
+++ b/tests/feature.rs
@@ -992,3 +992,10 @@ rgtest!(no_unicode, |dir: Dir, mut cmd: TestCommand| {
dir.create("test", "δ");
cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err();
});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/1790
+rgtest!(stop_on_nonmatch, |dir: Dir, mut cmd: TestCommand| {
+ dir.create("test", "line1\nline2\nline3\nline4\nline5");
+ cmd.args(&["--stop-on-nonmatch", "[235]"]);
+ eqnice!("test:line2\ntest:line3\n", cmd.stdout());
+});