diff options
Diffstat (limited to 'crates/printer/src/util.rs')
-rw-r--r-- | crates/printer/src/util.rs | 100 |
1 files changed, 86 insertions, 14 deletions
diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index 3948d970..37e56529 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -7,11 +7,13 @@ use std::time; use bstr::{ByteSlice, ByteVec}; use grep_matcher::{Captures, LineTerminator, Match, Matcher}; use grep_searcher::{ - LineIter, SinkContext, SinkContextKind, SinkError, SinkMatch, + LineIter, Searcher, SinkContext, SinkContextKind, SinkError, SinkMatch, }; #[cfg(feature = "serde1")] use serde::{Serialize, Serializer}; +use MAX_LOOK_AHEAD; + /// A type for handling replacements while amortizing allocation. pub struct Replacer<M: Matcher> { space: Option<Space<M>>, @@ -52,10 +54,22 @@ impl<M: Matcher> Replacer<M> { /// This can fail if the underlying matcher reports an error. pub fn replace_all<'a>( &'a mut self, + searcher: &Searcher, matcher: &M, - subject: &[u8], + mut subject: &[u8], + range: std::ops::Range<usize>, replacement: &[u8], ) -> io::Result<()> { + // See the giant comment in 'find_iter_at_in_context' below for why we + // do this dance. + let is_multi_line = searcher.multi_line_with_matcher(&matcher); + if is_multi_line { + if subject[range.end..].len() >= MAX_LOOK_AHEAD { + subject = &subject[..range.end + MAX_LOOK_AHEAD]; + } + } else { + subject = &subject[..range.end]; + } { let &mut Space { ref mut dst, ref mut caps, ref mut matches } = self.allocate(matcher)?; @@ -63,18 +77,24 @@ impl<M: Matcher> Replacer<M> { matches.clear(); matcher - .replace_with_captures(subject, caps, dst, |caps, dst| { - let start = dst.len(); - caps.interpolate( - |name| matcher.capture_index(name), - subject, - replacement, - dst, - ); - let end = dst.len(); - matches.push(Match::new(start, end)); - true - }) + .replace_with_captures_at( + subject, + range.start, + caps, + dst, + |caps, dst| { + let start = dst.len(); + caps.interpolate( + |name| matcher.capture_index(name), + subject, + replacement, + dst, + ); + let end = dst.len(); + matches.push(Match::new(start, end)); + true + }, + ) .map_err(io::Error::error_message)?; } Ok(()) @@ -357,3 +377,55 @@ pub fn trim_ascii_prefix( .count(); range.with_start(range.start() + count) } + +pub fn find_iter_at_in_context<M, F>( + searcher: &Searcher, + matcher: M, + mut bytes: &[u8], + range: std::ops::Range<usize>, + mut matched: F, +) -> io::Result<()> +where + M: Matcher, + F: FnMut(Match) -> bool, +{ + // This strange dance is to account for the possibility of look-ahead in + // the regex. The problem here is that mat.bytes() doesn't include the + // lines beyond the match boundaries in mulit-line mode, which means that + // when we try to rediscover the full set of matches here, the regex may no + // longer match if it required some look-ahead beyond the matching lines. + // + // PCRE2 (and the grep-matcher interfaces) has no way of specifying an end + // bound of the search. So we kludge it and let the regex engine search the + // rest of the buffer... But to avoid things getting too crazy, we cap the + // buffer. + // + // If it weren't for multi-line mode, then none of this would be needed. + // Alternatively, if we refactored the grep interfaces to pass along the + // full set of matches (if available) from the searcher, then that might + // also help here. But that winds up paying an upfront unavoidable cost for + // the case where matches don't need to be counted. So then you'd have to + // introduce a way to pass along matches conditionally, only when needed. + // Yikes. + // + // Maybe the bigger picture thing here is that the searcher should be + // responsible for finding matches when necessary, and the printer + // shouldn't be involved in this business in the first place. Sigh. Live + // and learn. Abstraction boundaries are hard. + let is_multi_line = searcher.multi_line_with_matcher(&matcher); + if is_multi_line { + if bytes[range.end..].len() >= MAX_LOOK_AHEAD { + bytes = &bytes[..range.end + MAX_LOOK_AHEAD]; + } + } else { + bytes = &bytes[..range.end]; + } + matcher + .find_iter_at(bytes, range.start, |m| { + if m.start() >= range.end { + return false; + } + matched(m) + }) + .map_err(io::Error::error_message) +} |