summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2021-05-30 21:36:35 -0400
committerAndrew Gallant <jamslam@gmail.com>2021-05-31 21:51:18 -0400
commitfc31aedcf3cae7a773c94a79775396759af10dd8 (patch)
tree71627aaa0397f1165e2ce214745df6dda2f99ce0
parent578e1992fa21d3612d3e613d8869dbbb16e33cbe (diff)
printer: vimgrep now only prints one line
It turns out that the vimgrep format really only wants one line per match, even when that match spans multiple lines. We continue to support the previous behavior (print all lines in a match) in the `grep-printer` crate. We add a new option to enable the "only print the first line" behavior, and unconditionally enable it in ripgrep. We can do that because the option has no effect in single-line mode, since, well, in that case matches are guaranteed to span one line anyway. Fixes #1866
-rw-r--r--CHANGELOG.md9
-rw-r--r--crates/core/args.rs1
-rw-r--r--crates/printer/src/standard.rs130
-rw-r--r--tests/multiline.rs6
-rw-r--r--tests/regression.rs6
5 files changed, 140 insertions, 12 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f9a0992..3d147752 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,8 @@ Unreleased changes. Release notes have not yet been written.
**BREAKING CHANGES**:
+**Binary detection output has changed slightly.**
+
In this release, a small tweak has been made to the output format when a binary
file is detected. Previously, it looked like this:
@@ -17,6 +19,13 @@ Now it looks like this:
FOO: binary file matches (found "\0" byte around offset XXX)
```
+**vimgrep output in multi-line now only prints the first line for each match.**
+
+See [issue 1866](https://github.com/BurntSushi/ripgrep/issues/1866) for more
+discussion on this. Previously, every line in a match was duplicated, even
+when it spanned multiple lines. There are no changes to vimgrep output when
+multi-line mode is disabled.
+
Security fixes:
* [CVE-2021-3013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3013):
diff --git a/crates/core/args.rs b/crates/core/args.rs
index caa378bd..ca7d6069 100644
--- a/crates/core/args.rs
+++ b/crates/core/args.rs
@@ -777,6 +777,7 @@ impl ArgMatches {
.path(self.with_filename(paths))
.only_matching(self.is_present("only-matching"))
.per_match(self.is_present("vimgrep"))
+ .per_match_one_line(true)
.replacement(self.replacement())
.max_columns(self.max_columns()?)
.max_columns_preview(self.max_columns_preview())
diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs
index 31dc1ad8..7657e41d 100644
--- a/crates/printer/src/standard.rs
+++ b/crates/printer/src/standard.rs
@@ -31,6 +31,7 @@ struct Config {
path: bool,
only_matching: bool,
per_match: bool,
+ per_match_one_line: bool,
replacement: Arc<Option<Vec<u8>>>,
max_columns: Option<u64>,
max_columns_preview: bool,
@@ -55,6 +56,7 @@ impl Default for Config {
path: true,
only_matching: false,
per_match: false,
+ per_match_one_line: false,
replacement: Arc::new(None),
max_columns: None,
max_columns_preview: false,
@@ -219,17 +221,36 @@ impl StandardBuilder {
/// the `column` option, which will show the starting column number for
/// every match on every line.
///
- /// When multi-line mode is enabled, each match and its accompanying lines
- /// are printed. As with single line matches, if a line contains multiple
- /// matches (even if only partially), then that line is printed once for
- /// each match it participates in. In multi-line mode, column numbers only
- /// indicate the start of a match. If a line only continues a match, then
- /// the column number printed is always `1`.
+ /// When multi-line mode is enabled, each match is printed, including every
+ /// line in the match. As with single line matches, if a line contains
+ /// multiple matches (even if only partially), then that line is printed
+ /// once for each match it participates in, assuming it's the first line in
+ /// that match. In multi-line mode, column numbers only indicate the start
+ /// of a match. Subsequent lines in a multi-line match always have a column
+ /// number of `1`.
+ ///
+ /// When a match contains multiple lines, enabling `per_match_one_line`
+ /// will cause only the first line each in match to be printed.
pub fn per_match(&mut self, yes: bool) -> &mut StandardBuilder {
self.config.per_match = yes;
self
}
+ /// Print at most one line per match when `per_match` is enabled.
+ ///
+ /// By default, every line in each match found is printed when `per_match`
+ /// is enabled. However, this is sometimes undesirable, e.g., when you
+ /// only ever want one line per match.
+ ///
+ /// This is only applicable when multi-line matching is enabled, since
+ /// otherwise, matches are guaranteed to span one line.
+ ///
+ /// This is disabled by default.
+ pub fn per_match_one_line(&mut self, yes: bool) -> &mut StandardBuilder {
+ self.config.per_match_one_line = yes;
+ self
+ }
+
/// Set the bytes that will be used to replace each occurrence of a match
/// found.
///
@@ -294,9 +315,6 @@ impl StandardBuilder {
/// Column numbers are computed in terms of bytes from the start of the
/// line being printed.
///
- /// For matches that span multiple lines, the column number for each
- /// matching line is in terms of the first matching line.
- ///
/// This is disabled by default.
pub fn column(&mut self, yes: bool) -> &mut StandardBuilder {
self.config.column = yes;
@@ -1139,6 +1157,15 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
}
}
self.write_line_term()?;
+ // It turns out that vimgrep really only wants one line per
+ // match, even when a match spans multiple lines. So when
+ // that option is enabled, we just quit after printing the
+ // first line.
+ //
+ // See: https://github.com/BurntSushi/ripgrep/issues/1866
+ if self.config().per_match_one_line {
+ break;
+ }
}
}
Ok(())
@@ -3059,6 +3086,91 @@ Holmeses, success in the province of detective work must always
}
#[test]
+ fn per_match_multi_line1_only_first_line() {
+ let matcher =
+ RegexMatcher::new(r"(?s:.{0})(Doctor Watsons|Sherlock)").unwrap();
+ let mut printer = StandardBuilder::new()
+ .per_match(true)
+ .per_match_one_line(true)
+ .column(true)
+ .build(NoColor::new(vec![]));
+ SearcherBuilder::new()
+ .multi_line(true)
+ .line_number(true)
+ .build()
+ .search_reader(
+ &matcher,
+ SHERLOCK.as_bytes(),
+ printer.sink(&matcher),
+ )
+ .unwrap();
+
+ let got = printer_contents(&mut printer);
+ let expected = "\
+1:9:For the Doctor Watsons of this world, as opposed to the Sherlock
+1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
+3:49:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+ assert_eq_printed!(expected, got);
+ }
+
+ #[test]
+ fn per_match_multi_line2_only_first_line() {
+ let matcher =
+ RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap();
+ let mut printer = StandardBuilder::new()
+ .per_match(true)
+ .per_match_one_line(true)
+ .column(true)
+ .build(NoColor::new(vec![]));
+ SearcherBuilder::new()
+ .multi_line(true)
+ .line_number(true)
+ .build()
+ .search_reader(
+ &matcher,
+ SHERLOCK.as_bytes(),
+ printer.sink(&matcher),
+ )
+ .unwrap();
+
+ let got = printer_contents(&mut printer);
+ let expected = "\
+1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
+5:12:but Doctor Watson has to have it taken out for him and dusted,
+";
+ assert_eq_printed!(expected, got);
+ }
+
+ #[test]
+ fn per_match_multi_line3_only_first_line() {
+ let matcher =
+ RegexMatcher::new(r"(?s)Watson.+?Holmeses|always.+?be").unwrap();
+ let mut printer = StandardBuilder::new()
+ .per_match(true)
+ .per_match_one_line(true)
+ .column(true)
+ .build(NoColor::new(vec![]));
+ SearcherBuilder::new()
+ .multi_line(true)
+ .line_number(true)
+ .build()
+ .search_reader(
+ &matcher,
+ SHERLOCK.as_bytes(),
+ printer.sink(&matcher),
+ )
+ .unwrap();
+
+ let got = printer_contents(&mut printer);
+ let expected = "\
+1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
+2:58:Holmeses, success in the province of detective work must always
+";
+ assert_eq_printed!(expected, got);
+ }
+
+ #[test]
fn replacement_passthru() {
let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap();
let mut printer = StandardBuilder::new()
diff --git a/tests/multiline.rs b/tests/multiline.rs
index eb9cb4d5..d084c96b 100644
--- a/tests/multiline.rs
+++ b/tests/multiline.rs
@@ -64,6 +64,11 @@ rgtest!(only_matching, |dir: Dir, mut cmd: TestCommand| {
});
// Tests that --vimgrep works in multiline mode.
+//
+// In particular, we test that only the first line of each match is printed,
+// even when a match spans multiple lines.
+//
+// See: https://github.com/BurntSushi/ripgrep/issues/1866
rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| {
dir.create("sherlock", SHERLOCK);
cmd.args(&[
@@ -77,7 +82,6 @@ rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| {
let expected = "\
sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
-sherlock:2:1:Holmeses, success in the province of detective work must always
sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes
sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted,
";
diff --git a/tests/regression.rs b/tests/regression.rs
index 8c132795..203ac140 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -917,10 +917,12 @@ rgtest!(r1866, |dir: Dir, mut cmd: TestCommand| {
"test",
]);
+ // vimgrep only wants the first line of each match, even when a match
+ // spans multiple lines.
+ //
+ // See: https://github.com/BurntSushi/ripgrep/issues/1866
let expected = "\
test:1:1:foobar
-test:2:1:foobar
-test:3:1:foo quux
test:3:5:foo quux
";
eqnice!(expected, cmd.stdout());