summaryrefslogtreecommitdiffstats
path: root/crates/printer/src/util.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/printer/src/util.rs')
-rw-r--r--crates/printer/src/util.rs100
1 files changed, 86 insertions, 14 deletions
diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs
index 3948d970..37e56529 100644
--- a/crates/printer/src/util.rs
+++ b/crates/printer/src/util.rs
@@ -7,11 +7,13 @@ use std::time;
use bstr::{ByteSlice, ByteVec};
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
use grep_searcher::{
- LineIter, SinkContext, SinkContextKind, SinkError, SinkMatch,
+ LineIter, Searcher, SinkContext, SinkContextKind, SinkError, SinkMatch,
};
#[cfg(feature = "serde1")]
use serde::{Serialize, Serializer};
+use MAX_LOOK_AHEAD;
+
/// A type for handling replacements while amortizing allocation.
pub struct Replacer<M: Matcher> {
space: Option<Space<M>>,
@@ -52,10 +54,22 @@ impl<M: Matcher> Replacer<M> {
/// This can fail if the underlying matcher reports an error.
pub fn replace_all<'a>(
&'a mut self,
+ searcher: &Searcher,
matcher: &M,
- subject: &[u8],
+ mut subject: &[u8],
+ range: std::ops::Range<usize>,
replacement: &[u8],
) -> io::Result<()> {
+ // See the giant comment in 'find_iter_at_in_context' below for why we
+ // do this dance.
+ let is_multi_line = searcher.multi_line_with_matcher(&matcher);
+ if is_multi_line {
+ if subject[range.end..].len() >= MAX_LOOK_AHEAD {
+ subject = &subject[..range.end + MAX_LOOK_AHEAD];
+ }
+ } else {
+ subject = &subject[..range.end];
+ }
{
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
self.allocate(matcher)?;
@@ -63,18 +77,24 @@ impl<M: Matcher> Replacer<M> {
matches.clear();
matcher
- .replace_with_captures(subject, caps, dst, |caps, dst| {
- let start = dst.len();
- caps.interpolate(
- |name| matcher.capture_index(name),
- subject,
- replacement,
- dst,
- );
- let end = dst.len();
- matches.push(Match::new(start, end));
- true
- })
+ .replace_with_captures_at(
+ subject,
+ range.start,
+ caps,
+ dst,
+ |caps, dst| {
+ let start = dst.len();
+ caps.interpolate(
+ |name| matcher.capture_index(name),
+ subject,
+ replacement,
+ dst,
+ );
+ let end = dst.len();
+ matches.push(Match::new(start, end));
+ true
+ },
+ )
.map_err(io::Error::error_message)?;
}
Ok(())
@@ -357,3 +377,55 @@ pub fn trim_ascii_prefix(
.count();
range.with_start(range.start() + count)
}
+
+pub fn find_iter_at_in_context<M, F>(
+ searcher: &Searcher,
+ matcher: M,
+ mut bytes: &[u8],
+ range: std::ops::Range<usize>,
+ mut matched: F,
+) -> io::Result<()>
+where
+ M: Matcher,
+ F: FnMut(Match) -> bool,
+{
+ // This strange dance is to account for the possibility of look-ahead in
+ // the regex. The problem here is that mat.bytes() doesn't include the
+ // lines beyond the match boundaries in mulit-line mode, which means that
+ // when we try to rediscover the full set of matches here, the regex may no
+ // longer match if it required some look-ahead beyond the matching lines.
+ //
+ // PCRE2 (and the grep-matcher interfaces) has no way of specifying an end
+ // bound of the search. So we kludge it and let the regex engine search the
+ // rest of the buffer... But to avoid things getting too crazy, we cap the
+ // buffer.
+ //
+ // If it weren't for multi-line mode, then none of this would be needed.
+ // Alternatively, if we refactored the grep interfaces to pass along the
+ // full set of matches (if available) from the searcher, then that might
+ // also help here. But that winds up paying an upfront unavoidable cost for
+ // the case where matches don't need to be counted. So then you'd have to
+ // introduce a way to pass along matches conditionally, only when needed.
+ // Yikes.
+ //
+ // Maybe the bigger picture thing here is that the searcher should be
+ // responsible for finding matches when necessary, and the printer
+ // shouldn't be involved in this business in the first place. Sigh. Live
+ // and learn. Abstraction boundaries are hard.
+ let is_multi_line = searcher.multi_line_with_matcher(&matcher);
+ if is_multi_line {
+ if bytes[range.end..].len() >= MAX_LOOK_AHEAD {
+ bytes = &bytes[..range.end + MAX_LOOK_AHEAD];
+ }
+ } else {
+ bytes = &bytes[..range.end];
+ }
+ matcher
+ .find_iter_at(bytes, range.start, |m| {
+ if m.start() >= range.end {
+ return false;
+ }
+ matched(m)
+ })
+ .map_err(io::Error::error_message)
+}