summaryrefslogtreecommitdiffstats
path: root/crates/printer
diff options
context:
space:
mode:
Diffstat (limited to 'crates/printer')
-rw-r--r--crates/printer/src/json.rs34
-rw-r--r--crates/printer/src/lib.rs11
-rw-r--r--crates/printer/src/standard.rs59
-rw-r--r--crates/printer/src/summary.rs66
-rw-r--r--crates/printer/src/util.rs100
5 files changed, 206 insertions, 64 deletions
diff --git a/crates/printer/src/json.rs b/crates/printer/src/json.rs
index b1a6fa2e..8500e6a1 100644
--- a/crates/printer/src/json.rs
+++ b/crates/printer/src/json.rs
@@ -4,14 +4,14 @@ use std::time::Instant;
use grep_matcher::{Match, Matcher};
use grep_searcher::{
- Searcher, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
- SinkMatch,
+ Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, SinkMatch,
};
use serde_json as json;
use counter::CounterWriter;
use jsont;
use stats::Stats;
+use util::find_iter_at_in_context;
/// The configuration for the JSON printer.
///
@@ -603,7 +603,12 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
/// Execute the matcher over the given bytes and record the match
/// locations if the current configuration demands match granularity.
- fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
+ fn record_matches(
+ &mut self,
+ searcher: &Searcher,
+ bytes: &[u8],
+ range: std::ops::Range<usize>,
+ ) -> io::Result<()> {
self.json.matches.clear();
// If printing requires knowing the location of each individual match,
// then compute and stored those right now for use later. While this
@@ -612,12 +617,17 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
// the extent that it's easy to ensure that we never do more than
// one search to find the matches.
let matches = &mut self.json.matches;
- self.matcher
- .find_iter(bytes, |m| {
- matches.push(m);
+ find_iter_at_in_context(
+ searcher,
+ &self.matcher,
+ bytes,
+ range.clone(),
+ |m| {
+ let (s, e) = (m.start() - range.start, m.end() - range.start);
+ matches.push(Match::new(s, e));
true
- })
- .map_err(io::Error::error_message)?;
+ },
+ )?;
// Don't report empty matches appearing at the end of the bytes.
if !matches.is_empty()
&& matches.last().unwrap().is_empty()
@@ -691,7 +701,11 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.after_context_remaining = searcher.after_context() as u64;
}
- self.record_matches(mat.bytes())?;
+ self.record_matches(
+ searcher,
+ mat.buffer(),
+ mat.bytes_range_in_buffer(),
+ )?;
self.stats.add_matches(self.json.matches.len() as u64);
self.stats.add_matched_lines(mat.lines().count() as u64);
@@ -720,7 +734,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.after_context_remaining.saturating_sub(1);
}
let submatches = if searcher.invert_match() {
- self.record_matches(ctx.bytes())?;
+ self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
SubMatches::new(ctx.bytes(), &self.json.matches)
} else {
SubMatches::empty()
diff --git a/crates/printer/src/lib.rs b/crates/printer/src/lib.rs
index fa321dcd..abb1087f 100644
--- a/crates/printer/src/lib.rs
+++ b/crates/printer/src/lib.rs
@@ -92,6 +92,17 @@ pub use stats::Stats;
pub use summary::{Summary, SummaryBuilder, SummaryKind, SummarySink};
pub use util::PrinterPath;
+// The maximum number of bytes to execute a search to account for look-ahead.
+//
+// This is an unfortunate kludge since PCRE2 doesn't provide a way to search
+// a substring of some input while accounting for look-ahead. In theory, we
+// could refactor the various 'grep' interfaces to account for it, but it would
+// be a large change. So for now, we just let PCRE2 go looking a bit for a
+// match without searching the entire rest of the contents.
+//
+// Note that this kludge is only active in multi-line mode.
+const MAX_LOOK_AHEAD: usize = 128;
+
#[macro_use]
mod macros;
diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs
index e7fe7c38..0c853f1b 100644
--- a/crates/printer/src/standard.rs
+++ b/crates/printer/src/standard.rs
@@ -8,15 +8,17 @@ use std::time::Instant;
use bstr::ByteSlice;
use grep_matcher::{Match, Matcher};
use grep_searcher::{
- LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkError,
- SinkFinish, SinkMatch,
+ LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkFinish,
+ SinkMatch,
};
use termcolor::{ColorSpec, NoColor, WriteColor};
use color::ColorSpecs;
use counter::CounterWriter;
use stats::Stats;
-use util::{trim_ascii_prefix, PrinterPath, Replacer, Sunk};
+use util::{
+ find_iter_at_in_context, trim_ascii_prefix, PrinterPath, Replacer, Sunk,
+};
/// The configuration for the standard printer.
///
@@ -682,7 +684,12 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
/// Execute the matcher over the given bytes and record the match
/// locations if the current configuration demands match granularity.
- fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
+ fn record_matches(
+ &mut self,
+ searcher: &Searcher,
+ bytes: &[u8],
+ range: std::ops::Range<usize>,
+ ) -> io::Result<()> {
self.standard.matches.clear();
if !self.needs_match_granularity {
return Ok(());
@@ -695,16 +702,21 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
// one search to find the matches (well, for replacements, we do one
// additional search to perform the actual replacement).
let matches = &mut self.standard.matches;
- self.matcher
- .find_iter(bytes, |m| {
- matches.push(m);
+ find_iter_at_in_context(
+ searcher,
+ &self.matcher,
+ bytes,
+ range.clone(),
+ |m| {
+ let (s, e) = (m.start() - range.start, m.end() - range.start);
+ matches.push(Match::new(s, e));
true
- })
- .map_err(io::Error::error_message)?;
+ },
+ )?;
// Don't report empty matches appearing at the end of the bytes.
if !matches.is_empty()
&& matches.last().unwrap().is_empty()
- && matches.last().unwrap().start() >= bytes.len()
+ && matches.last().unwrap().start() >= range.end
{
matches.pop().unwrap();
}
@@ -715,14 +727,25 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
/// replacement, lazily allocating memory if necessary.
///
/// To access the result of a replacement, use `replacer.replacement()`.
- fn replace(&mut self, bytes: &[u8]) -> io::Result<()> {
+ fn replace(
+ &mut self,
+ searcher: &Searcher,
+ bytes: &[u8],
+ range: std::ops::Range<usize>,
+ ) -> io::Result<()> {
self.replacer.clear();
if self.standard.config.replacement.is_some() {
let replacement = (*self.standard.config.replacement)
.as_ref()
.map(|r| &*r)
.unwrap();
- self.replacer.replace_all(&self.matcher, bytes, replacement)?;
+ self.replacer.replace_all(
+ searcher,
+ &self.matcher,
+ bytes,
+ range,
+ replacement,
+ )?;
}
Ok(())
}
@@ -777,8 +800,12 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
self.after_context_remaining = searcher.after_context() as u64;
}
- self.record_matches(mat.bytes())?;
- self.replace(mat.bytes())?;
+ self.record_matches(
+ searcher,
+ mat.buffer(),
+ mat.bytes_range_in_buffer(),
+ )?;
+ self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?;
if let Some(ref mut stats) = self.stats {
stats.add_matches(self.standard.matches.len() as u64);
@@ -807,8 +834,8 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
self.after_context_remaining.saturating_sub(1);
}
if searcher.invert_match() {
- self.record_matches(ctx.bytes())?;
- self.replace(ctx.bytes())?;
+ self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
+ self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
}
if searcher.binary_detection().convert_byte().is_some() {
if self.binary_byte_offset.is_some() {
diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs
index 5ce087ef..e062662d 100644
--- a/crates/printer/src/summary.rs
+++ b/crates/printer/src/summary.rs
@@ -11,7 +11,7 @@ use termcolor::{ColorSpec, NoColor, WriteColor};
use color::ColorSpecs;
use counter::CounterWriter;
use stats::Stats;
-use util::PrinterPath;
+use util::{find_iter_at_in_context, PrinterPath};
/// The configuration for the summary printer.
///
@@ -504,6 +504,17 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
self.stats.as_ref()
}
+ /// Returns true if and only if the searcher may report matches over
+ /// multiple lines.
+ ///
+ /// Note that this doesn't just return whether the searcher is in multi
+ /// line mode, but also checks if the mater can match over multiple lines.
+ /// If it can't, then we don't need multi line handling, even if the
+ /// searcher has multi line mode enabled.
+ fn multi_line(&self, searcher: &Searcher) -> bool {
+ searcher.multi_line_with_matcher(&self.matcher)
+ }
+
/// Returns true if this printer should quit.
///
/// This implements the logic for handling quitting after seeing a certain
@@ -579,32 +590,39 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
fn matched(
&mut self,
- _searcher: &Searcher,
+ searcher: &Searcher,
mat: &SinkMatch,
) -> Result<bool, io::Error> {
- self.match_count += 1;
- if let Some(ref mut stats) = self.stats {
- let mut match_count = 0;
- self.matcher
- .find_iter(mat.bytes(), |_| {
- match_count += 1;
+ let is_multi_line = self.multi_line(searcher);
+ let sink_match_count = if self.stats.is_none() && !is_multi_line {
+ 1
+ } else {
+ // This gives us as many bytes as the searcher can offer. This
+ // isn't guaranteed to hold the necessary context to get match
+ // detection correct (because of look-around), but it does in
+ // practice.
+ let buf = mat.buffer();
+ let range = mat.bytes_range_in_buffer();
+ let mut count = 0;
+ find_iter_at_in_context(
+ searcher,
+ &self.matcher,
+ buf,
+ range,
+ |_| {
+ count += 1;
true
- })
- .map_err(io::Error::error_message)?;
- if match_count == 0 {
- // It is possible for the match count to be zero when
- // look-around is used. Since `SinkMatch` won't necessarily
- // contain the look-around in its match span, the search here
- // could fail to find anything.
- //
- // It seems likely that setting match_count=1 here is probably
- // wrong in some cases, but I don't think we can do any
- // better. (Because this printer cannot assume that subsequent
- // contents have been loaded into memory, so we have no way of
- // increasing the search span here.)
- match_count = 1;
- }
- stats.add_matches(match_count);
+ },
+ )?;
+ count
+ };
+ if is_multi_line {
+ self.match_count += sink_match_count;
+ } else {
+ self.match_count += 1;
+ }
+ if let Some(ref mut stats) = self.stats {
+ stats.add_matches(sink_match_count);
stats.add_matched_lines(mat.lines().count() as u64);
} else if self.summary.config.kind.quit_early() {
return Ok(false);
diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs
index 3948d970..37e56529 100644
--- a/crates/printer/src/util.rs
+++ b/crates/printer/src/util.rs
@@ -7,11 +7,13 @@ use std::time;
use bstr::{ByteSlice, ByteVec};
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
use grep_searcher::{
- LineIter, SinkContext, SinkContextKind, SinkError, SinkMatch,
+ LineIter, Searcher, SinkContext, SinkContextKind, SinkError, SinkMatch,
};
#[cfg(feature = "serde1")]
use serde::{Serialize, Serializer};
+use MAX_LOOK_AHEAD;
+
/// A type for handling replacements while amortizing allocation.
pub struct Replacer<M: Matcher> {
space: Option<Space<M>>,
@@ -52,10 +54,22 @@ impl<M: Matcher> Replacer<M> {
/// This can fail if the underlying matcher reports an error.
pub fn replace_all<'a>(
&'a mut self,
+ searcher: &Searcher,
matcher: &M,
- subject: &[u8],
+ mut subject: &[u8],
+ range: std::ops::Range<usize>,
replacement: &[u8],
) -> io::Result<()> {
+ // See the giant comment in 'find_iter_at_in_context' below for why we
+ // do this dance.
+ let is_multi_line = searcher.multi_line_with_matcher(&matcher);
+ if is_multi_line {
+ if subject[range.end..].len() >= MAX_LOOK_AHEAD {
+ subject = &subject[..range.end + MAX_LOOK_AHEAD];
+ }
+ } else {
+ subject = &subject[..range.end];
+ }
{
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
self.allocate(matcher)?;
@@ -63,18 +77,24 @@ impl<M: Matcher> Replacer<M> {
matches.clear();
matcher
- .replace_with_captures(subject, caps, dst, |caps, dst| {
- let start = dst.len();
- caps.interpolate(
- |name| matcher.capture_index(name),
- subject,
- replacement,
- dst,
- );
- let end = dst.len();
- matches.push(Match::new(start, end));
- true
- })
+ .replace_with_captures_at(
+ subject,
+ range.start,
+ caps,
+ dst,
+ |caps, dst| {
+ let start = dst.len();
+ caps.interpolate(
+ |name| matcher.capture_index(name),
+ subject,
+ replacement,
+ dst,
+ );
+ let end = dst.len();
+ matches.push(Match::new(start, end));
+ true
+ },
+ )
.map_err(io::Error::error_message)?;
}
Ok(())
@@ -357,3 +377,55 @@ pub fn trim_ascii_prefix(
.count();
range.with_start(range.start() + count)
}
+
+pub fn find_iter_at_in_context<M, F>(
+ searcher: &Searcher,
+ matcher: M,
+ mut bytes: &[u8],
+ range: std::ops::Range<usize>,
+ mut matched: F,
+) -> io::Result<()>
+where
+ M: Matcher,
+ F: FnMut(Match) -> bool,
+{
+ // This strange dance is to account for the possibility of look-ahead in
+ // the regex. The problem here is that mat.bytes() doesn't include the
+ // lines beyond the match boundaries in mulit-line mode, which means that
+ // when we try to rediscover the full set of matches here, the regex may no
+ // longer match if it required some look-ahead beyond the matching lines.
+ //
+ // PCRE2 (and the grep-matcher interfaces) has no way of specifying an end
+ // bound of the search. So we kludge it and let the regex engine search the
+ // rest of the buffer... But to avoid things getting too crazy, we cap the
+ // buffer.
+ //
+ // If it weren't for multi-line mode, then none of this would be needed.
+ // Alternatively, if we refactored the grep interfaces to pass along the
+ // full set of matches (if available) from the searcher, then that might
+ // also help here. But that winds up paying an upfront unavoidable cost for
+ // the case where matches don't need to be counted. So then you'd have to
+ // introduce a way to pass along matches conditionally, only when needed.
+ // Yikes.
+ //
+ // Maybe the bigger picture thing here is that the searcher should be
+ // responsible for finding matches when necessary, and the printer
+ // shouldn't be involved in this business in the first place. Sigh. Live
+ // and learn. Abstraction boundaries are hard.
+ let is_multi_line = searcher.multi_line_with_matcher(&matcher);
+ if is_multi_line {
+ if bytes[range.end..].len() >= MAX_LOOK_AHEAD {
+ bytes = &bytes[..range.end + MAX_LOOK_AHEAD];
+ }
+ } else {
+ bytes = &bytes[..range.end];
+ }
+ matcher
+ .find_iter_at(bytes, range.start, |m| {
+ if m.start() >= range.end {
+ return false;
+ }
+ matched(m)
+ })
+ .map_err(io::Error::error_message)
+}