summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG.md2
-rw-r--r--crates/printer/src/standard.rs74
-rw-r--r--crates/searcher/src/searcher/glue.rs23
-rw-r--r--tests/json.rs17
-rw-r--r--tests/regression.rs9
5 files changed, 108 insertions, 17 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3d147752..900162bf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -62,6 +62,8 @@ Bug fixes:
* [BUG #1277](https://github.com/BurntSushi/ripgrep/issues/1277):
Document cygwin path translation behavior in the FAQ.
+* [BUG #1311](https://github.com/BurntSushi/ripgrep/issues/1311):
+ Fix multi-line bug where a search & replace for `\n` didn't work as expected.
* [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642):
Fixes a bug where using `-m` and `-A` printed more matches than the limit.
* [BUG #1703](https://github.com/BurntSushi/ripgrep/issues/1703):
diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs
index 7657e41d..e7fe7c38 100644
--- a/crates/printer/src/standard.rs
+++ b/crates/printer/src/standard.rs
@@ -3224,6 +3224,80 @@ Holmeses, success in the province of detective work must always
assert_eq_printed!(expected, got);
}
+ // This is a somewhat weird test that checks the behavior of attempting
+ // to replace a line terminator with something else.
+ //
+ // See: https://github.com/BurntSushi/ripgrep/issues/1311
+ #[test]
+ fn replacement_multi_line() {
+ let matcher = RegexMatcher::new(r"\n").unwrap();
+ let mut printer = StandardBuilder::new()
+ .replacement(Some(b"?".to_vec()))
+ .build(NoColor::new(vec![]));
+ SearcherBuilder::new()
+ .line_number(true)
+ .multi_line(true)
+ .build()
+ .search_reader(
+ &matcher,
+ "hello\nworld\n".as_bytes(),
+ printer.sink(&matcher),
+ )
+ .unwrap();
+
+ let got = printer_contents(&mut printer);
+ let expected = "1:hello?world?\n";
+ assert_eq_printed!(expected, got);
+ }
+
+ #[test]
+ fn replacement_multi_line_diff_line_term() {
+ let matcher = RegexMatcherBuilder::new()
+ .line_terminator(Some(b'\x00'))
+ .build(r"\n")
+ .unwrap();
+ let mut printer = StandardBuilder::new()
+ .replacement(Some(b"?".to_vec()))
+ .build(NoColor::new(vec![]));
+ SearcherBuilder::new()
+ .line_terminator(LineTerminator::byte(b'\x00'))
+ .line_number(true)
+ .multi_line(true)
+ .build()
+ .search_reader(
+ &matcher,
+ "hello\nworld\n".as_bytes(),
+ printer.sink(&matcher),
+ )
+ .unwrap();
+
+ let got = printer_contents(&mut printer);
+ let expected = "1:hello?world?\x00";
+ assert_eq_printed!(expected, got);
+ }
+
+ #[test]
+ fn replacement_multi_line_combine_lines() {
+ let matcher = RegexMatcher::new(r"\n(.)?").unwrap();
+ let mut printer = StandardBuilder::new()
+ .replacement(Some(b"?$1".to_vec()))
+ .build(NoColor::new(vec![]));
+ SearcherBuilder::new()
+ .line_number(true)
+ .multi_line(true)
+ .build()
+ .search_reader(
+ &matcher,
+ "hello\nworld\n".as_bytes(),
+ printer.sink(&matcher),
+ )
+ .unwrap();
+
+ let got = printer_contents(&mut printer);
+ let expected = "1:hello?world?\n";
+ assert_eq_printed!(expected, got);
+ }
+
#[test]
fn replacement_max_columns() {
let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap();
diff --git a/crates/searcher/src/searcher/glue.rs b/crates/searcher/src/searcher/glue.rs
index e73376a8..7d678d70 100644
--- a/crates/searcher/src/searcher/glue.rs
+++ b/crates/searcher/src/searcher/glue.rs
@@ -226,10 +226,19 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
}
Some(last_match) => {
// If the lines in the previous match overlap with the lines
- // in this match, then simply grow the match and move on.
- // This happens when the next match begins on the same line
- // that the last match ends on.
- if last_match.end() > line.start() {
+ // in this match, then simply grow the match and move on. This
+ // happens when the next match begins on the same line that the
+ // last match ends on.
+ //
+ // Note that we do not technically require strict overlap here.
+ // Instead, we only require that the lines are adjacent. This
+ // provides larger blocks of lines to the printer, and results
+ // in overall better behavior with respect to how replacements
+ // are handled.
+ //
+ // See: https://github.com/BurntSushi/ripgrep/issues/1311
+ // And also the associated commit fixing #1311.
+ if last_match.end() >= line.start() {
self.last_match = Some(last_match.with_end(line.end()));
Ok(true)
} else {
@@ -714,21 +723,23 @@ d
haystack.push_str("zzz\n");
}
haystack.push_str("a\n");
+ haystack.push_str("zzz\n");
haystack.push_str("a\x00a\n");
+ haystack.push_str("zzz\n");
haystack.push_str("a\n");
// The line buffered searcher has slightly different semantics here.
// Namely, it will *always* detect binary data in the current buffer
// before searching it. Thus, the total number of bytes searched is
// smaller than below.
- let exp = "0:a\n\nbyte count:262146\nbinary offset:262149\n";
+ let exp = "0:a\n\nbyte count:262146\nbinary offset:262153\n";
// In contrast, the slice readers (for multi line as well) will only
// look for binary data in the initial chunk of bytes. After that
// point, it only looks for binary data in matches. Note though that
// the binary offset remains the same. (See the binary4 test for a case
// where the offset is explicitly different.)
let exp_slice =
- "0:a\n262146:a\n\nbyte count:262149\nbinary offset:262149\n";
+ "0:a\n262146:a\n\nbyte count:262153\nbinary offset:262153\n";
SearcherTester::new(&haystack, "a")
.binary_detection(BinaryDetection::quit(0))
diff --git a/tests/json.rs b/tests/json.rs
index 477b36df..97d8e719 100644
--- a/tests/json.rs
+++ b/tests/json.rs
@@ -323,24 +323,19 @@ rgtest!(r1095_crlf_empty_match, |dir: Dir, mut cmd: TestCommand| {
// Check without --crlf flag.
let msgs = json_decode(&cmd.arg("-U").arg("--json").arg("\n").stdout());
- assert_eq!(msgs.len(), 5);
+ assert_eq!(msgs.len(), 4);
let m = msgs[1].unwrap_match();
- assert_eq!(m.lines, Data::text("test\r\n"));
- assert_eq!(m.submatches[0].m, Data::text("\n"));
-
- let m = msgs[2].unwrap_match();
- assert_eq!(m.lines, Data::text("\n"));
+ assert_eq!(m.lines, Data::text("test\r\n\n"));
assert_eq!(m.submatches[0].m, Data::text("\n"));
+ assert_eq!(m.submatches[1].m, Data::text("\n"));
// Now check with --crlf flag.
let msgs = json_decode(&cmd.arg("--crlf").stdout());
+ assert_eq!(msgs.len(), 4);
let m = msgs[1].unwrap_match();
- assert_eq!(m.lines, Data::text("test\r\n"));
- assert_eq!(m.submatches[0].m, Data::text("\n"));
-
- let m = msgs[2].unwrap_match();
- assert_eq!(m.lines, Data::text("\n"));
+ assert_eq!(m.lines, Data::text("test\r\n\n"));
assert_eq!(m.submatches[0].m, Data::text("\n"));
+ assert_eq!(m.submatches[1].m, Data::text("\n"));
});
diff --git a/tests/regression.rs b/tests/regression.rs
index 203ac140..94e62969 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -744,6 +744,15 @@ rgtest!(r1259_drop_last_byte_nonl, |dir: Dir, mut cmd: TestCommand| {
eqnice!("fz\n", cmd.arg("-f").arg("patterns-nl").arg("test").stdout());
});
+// See: https://github.com/BurntSushi/ripgrep/issues/1311
+rgtest!(r1311_multi_line_term_replace, |dir: Dir, mut cmd: TestCommand| {
+ dir.create("input", "hello\nworld\n");
+ eqnice!(
+ "1:hello?world?\n",
+ cmd.args(&["-U", "-r?", "-n", "\n", "input"]).stdout()
+ );
+});
+
// See: https://github.com/BurntSushi/ripgrep/issues/1319
rgtest!(r1319, |dir: Dir, mut cmd: TestCommand| {
dir.create("input", "CCAGCTACTCGGGAGGCTGAGGCTGGAGGATCGCTTGAGTCCAGGAGTTC");