diff options
author | Andrew Gallant <jamslam@gmail.com> | 2022-05-11 08:11:47 -0400 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2022-05-11 08:27:28 -0400 |
commit | 7822b8069ed3190d76335b99c41c67e7ac31f76d (patch) | |
tree | e39cc69074e28cc2c82bd4dc8e0ac35fb0389b34 /crates/printer/src/util.rs | |
parent | 4dc6c73c5a9203c5a8a89ce2161feca542329812 (diff) |
printer: fix duplicative replacement in multiline modeag/fix-multiline-replace-duplicate
This furthers our kludge of dealing with PCRE2's look-around in the
printer. Because of our bad abstraction boundaries, we added a kludge to
deal with PCRE2 look-around by extending the bytes we search by a fixed
amount to hopefully permit any look-around to operate. But because of
that kludge, we wind up over extending ourselves in some cases and
dragging along those extra bytes.
We had fixed this for simple searching by simply rejecting any matches
past the end point. But we didn't do the same for replacements. So this
commit extends our kludge to replacements.
Thanks to @sonohgong for diagnosing the problem and proposing a fix. I
mostly went with their solution, but adding the new replacement routine
as an internal helper rather than a new APIn in the 'grep-matcher'
crate.
Fixes #2095, Fixes #2208
Diffstat (limited to 'crates/printer/src/util.rs')
-rw-r--r-- | crates/printer/src/util.rs | 70 |
1 files changed, 50 insertions, 20 deletions
diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index 434deec7..73a29964 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -82,26 +82,26 @@ impl<M: Matcher> Replacer<M> { dst.clear(); matches.clear(); - matcher - .replace_with_captures_at( - subject, - range.start, - caps, - dst, - |caps, dst| { - let start = dst.len(); - caps.interpolate( - |name| matcher.capture_index(name), - subject, - replacement, - dst, - ); - let end = dst.len(); - matches.push(Match::new(start, end)); - true - }, - ) - .map_err(io::Error::error_message)?; + replace_with_captures_in_context( + matcher, + subject, + range.clone(), + caps, + dst, + |caps, dst| { + let start = dst.len(); + caps.interpolate( + |name| matcher.capture_index(name), + subject, + replacement, + dst, + ); + let end = dst.len(); + matches.push(Match::new(start, end)); + true + }, + ) + .map_err(io::Error::error_message)?; } Ok(()) } @@ -458,3 +458,33 @@ pub fn trim_line_terminator( *line = line.with_end(end); } } + +/// Like `Matcher::replace_with_captures_at`, but accepts an end bound. +/// +/// See also: `find_iter_at_in_context` for why we need this. +fn replace_with_captures_in_context<M, F>( + matcher: M, + bytes: &[u8], + range: std::ops::Range<usize>, + caps: &mut M::Captures, + dst: &mut Vec<u8>, + mut append: F, +) -> Result<(), M::Error> +where + M: Matcher, + F: FnMut(&M::Captures, &mut Vec<u8>) -> bool, +{ + let mut last_match = range.start; + matcher.captures_iter_at(bytes, range.start, caps, |caps| { + let m = caps.get(0).unwrap(); + if m.start() >= range.end { + return false; + } + dst.extend(&bytes[last_match..m.start()]); + last_match = m.end(); + append(caps, dst) + })?; + let end = std::cmp::min(bytes.len(), range.end); + dst.extend(&bytes[last_match..end]); + Ok(()) +} |