summaryrefslogtreecommitdiffstats
path: root/grep-searcher/src
diff options
context:
space:
mode:
Diffstat (limited to 'grep-searcher/src')
-rw-r--r--grep-searcher/src/line_buffer.rs8
-rw-r--r--grep-searcher/src/searcher/core.rs36
-rw-r--r--grep-searcher/src/searcher/glue.rs19
-rw-r--r--grep-searcher/src/searcher/mod.rs62
-rw-r--r--grep-searcher/src/sink.rs40
5 files changed, 135 insertions, 30 deletions
diff --git a/grep-searcher/src/line_buffer.rs b/grep-searcher/src/line_buffer.rs
index c2e54a9e..cc7dd578 100644
--- a/grep-searcher/src/line_buffer.rs
+++ b/grep-searcher/src/line_buffer.rs
@@ -317,6 +317,14 @@ pub struct LineBuffer {
}
impl LineBuffer {
+ /// Set the binary detection method used on this line buffer.
+ ///
+ /// This permits dynamically changing the binary detection strategy on
+ /// an existing line buffer without needing to create a new one.
+ pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
+ self.config.binary = binary;
+ }
+
/// Reset this buffer, such that it can be used with a new reader.
fn clear(&mut self) {
self.pos = 0;
diff --git a/grep-searcher/src/searcher/core.rs b/grep-searcher/src/searcher/core.rs
index ff2cd18d..dd621bba 100644
--- a/grep-searcher/src/searcher/core.rs
+++ b/grep-searcher/src/searcher/core.rs
@@ -90,6 +90,13 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.sink_matched(buf, range)
}
+ pub fn binary_data(
+ &mut self,
+ binary_byte_offset: u64,
+ ) -> Result<bool, S::Error> {
+ self.sink.binary_data(&self.searcher, binary_byte_offset)
+ }
+
pub fn begin(&mut self) -> Result<bool, S::Error> {
self.sink.begin(&self.searcher)
}
@@ -141,19 +148,28 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
consumed
}
- pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool {
+ pub fn detect_binary(
+ &mut self,
+ buf: &[u8],
+ range: &Range,
+ ) -> Result<bool, S::Error> {
if self.binary_byte_offset.is_some() {
- return true;
+ return Ok(self.config.binary.quit_byte().is_some());
}
let binary_byte = match self.config.binary.0 {
BinaryDetection::Quit(b) => b,
- _ => return false,
+ BinaryDetection::Convert(b) => b,
+ _ => return Ok(false),
};
if let Some(i) = B(&buf[*range]).find_byte(binary_byte) {
- self.binary_byte_offset = Some(range.start() + i);
- true
+ let offset = range.start() + i;
+ self.binary_byte_offset = Some(offset);
+ if !self.binary_data(offset as u64)? {
+ return Ok(true);
+ }
+ Ok(self.config.binary.quit_byte().is_some())
} else {
- false
+ Ok(false)
}
}
@@ -416,7 +432,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
buf: &[u8],
range: &Range,
) -> Result<bool, S::Error> {
- if self.binary && self.detect_binary(buf, range) {
+ if self.binary && self.detect_binary(buf, range)? {
return Ok(false);
}
if !self.sink_break_context(range.start())? {
@@ -448,7 +464,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
buf: &[u8],
range: &Range,
) -> Result<bool, S::Error> {
- if self.binary && self.detect_binary(buf, range) {
+ if self.binary && self.detect_binary(buf, range)? {
return Ok(false);
}
self.count_lines(buf, range.start());
@@ -478,7 +494,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
) -> Result<bool, S::Error> {
assert!(self.after_context_left >= 1);
- if self.binary && self.detect_binary(buf, range) {
+ if self.binary && self.detect_binary(buf, range)? {
return Ok(false);
}
self.count_lines(buf, range.start());
@@ -507,7 +523,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
buf: &[u8],
range: &Range,
) -> Result<bool, S::Error> {
- if self.binary && self.detect_binary(buf, range) {
+ if self.binary && self.detect_binary(buf, range)? {
return Ok(false);
}
self.count_lines(buf, range.start());
diff --git a/grep-searcher/src/searcher/glue.rs b/grep-searcher/src/searcher/glue.rs
index 3a5d4291..4f362dab 100644
--- a/grep-searcher/src/searcher/glue.rs
+++ b/grep-searcher/src/searcher/glue.rs
@@ -51,6 +51,7 @@ where M: Matcher,
fn fill(&mut self) -> Result<bool, S::Error> {
assert!(self.rdr.buffer()[self.core.pos()..].is_empty());
+ let already_binary = self.rdr.binary_byte_offset().is_some();
let old_buf_len = self.rdr.buffer().len();
let consumed = self.core.roll(self.rdr.buffer());
self.rdr.consume(consumed);
@@ -58,7 +59,14 @@ where M: Matcher,
Err(err) => return Err(S::Error::error_io(err)),
Ok(didread) => didread,
};
- if !didread || self.rdr.binary_byte_offset().is_some() {
+ if !already_binary {
+ if let Some(offset) = self.rdr.binary_byte_offset() {
+ if !self.core.binary_data(offset)? {
+ return Ok(false);
+ }
+ }
+ }
+ if !didread || self.should_binary_quit() {
return Ok(false);
}
// If rolling the buffer didn't result in consuming anything and if
@@ -71,6 +79,11 @@ where M: Matcher,
}
Ok(true)
}
+
+ fn should_binary_quit(&self) -> bool {
+ self.rdr.binary_byte_offset().is_some()
+ && self.config.binary.quit_byte().is_some()
+ }
}
#[derive(Debug)]
@@ -103,7 +116,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
DEFAULT_BUFFER_CAPACITY,
);
let binary_range = Range::new(0, binary_upto);
- if !self.core.detect_binary(self.slice, &binary_range) {
+ if !self.core.detect_binary(self.slice, &binary_range)? {
while
!self.slice[self.core.pos()..].is_empty()
&& self.core.match_by_line(self.slice)?
@@ -155,7 +168,7 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
DEFAULT_BUFFER_CAPACITY,
);
let binary_range = Range::new(0, binary_upto);
- if !self.core.detect_binary(self.slice, &binary_range) {
+ if !self.core.detect_binary(self.slice, &binary_range)? {
let mut keepgoing = true;
while !self.slice[self.core.pos()..].is_empty() && keepgoing {
keepgoing = self.sink()?;
diff --git a/grep-searcher/src/searcher/mod.rs b/grep-searcher/src/searcher/mod.rs
index 729b491b..e20e04a3 100644
--- a/grep-searcher/src/searcher/mod.rs
+++ b/grep-searcher/src/searcher/mod.rs
@@ -75,25 +75,41 @@ impl BinaryDetection {
BinaryDetection(line_buffer::BinaryDetection::Quit(binary_byte))
}
- // TODO(burntsushi): Figure out how to make binary conversion work. This
- // permits implementing GNU grep's default behavior, which is to zap NUL
- // bytes but still execute a search (if a match is detected, then GNU grep
- // stops and reports that a match was found but doesn't print the matching
- // line itself).
- //
- // This behavior is pretty simple to implement using the line buffer (and
- // in fact, it is already implemented and tested), since there's a fixed
- // size buffer that we can easily write to. The issue arises when searching
- // a `&[u8]` (whether on the heap or via a memory map), since this isn't
- // something we can easily write to.
-
- /// The given byte is searched in all contents read by the line buffer. If
- /// it occurs, then it is replaced by the line terminator. The line buffer
- /// guarantees that this byte will never be observable by callers.
- #[allow(dead_code)]
- fn convert(binary_byte: u8) -> BinaryDetection {
+ /// Binary detection is performed by looking for the given byte, and
+ /// replacing it with the line terminator configured on the searcher.
+ /// (If the searcher is configured to use `CRLF` as the line terminator,
+ /// then this byte is replaced by just `LF`.)
+ ///
+ /// When searching is performed using a fixed size buffer, then the
+ /// contents of that buffer are always searched for the presence of this
+ /// byte and replaced with the line terminator. In effect, the caller is
+ /// guaranteed to never observe this byte while searching.
+ ///
+ /// When searching is performed with the entire contents mapped into
+ /// memory, then this setting has no effect and is ignored.
+ pub fn convert(binary_byte: u8) -> BinaryDetection {
BinaryDetection(line_buffer::BinaryDetection::Convert(binary_byte))
}
+
+ /// If this binary detection uses the "quit" strategy, then this returns
+ /// the byte that will cause a search to quit. In any other case, this
+ /// returns `None`.
+ pub fn quit_byte(&self) -> Option<u8> {
+ match self.0 {
+ line_buffer::BinaryDetection::Quit(b) => Some(b),
+ _ => None,
+ }
+ }
+
+ /// If this binary detection uses the "convert" strategy, then this returns
+ /// the byte that will be replaced by the line terminator. In any other
+ /// case, this returns `None`.
+ pub fn convert_byte(&self) -> Option<u8> {
+ match self.0 {
+ line_buffer::BinaryDetection::Convert(b) => Some(b),
+ _ => None,
+ }
+ }
}
/// An encoding to use when searching.
@@ -739,6 +755,12 @@ impl Searcher {
}
}
+ /// Set the binary detection method used on this searcher.
+ pub fn set_binary_detection(&mut self, detection: BinaryDetection) {
+ self.config.binary = detection.clone();
+ self.line_buffer.borrow_mut().set_binary_detection(detection.0);
+ }
+
/// Check that the searcher's configuration and the matcher are consistent
/// with each other.
fn check_config<M: Matcher>(&self, matcher: M) -> Result<(), ConfigError> {
@@ -778,6 +800,12 @@ impl Searcher {
self.config.line_term
}
+ /// Returns the type of binary detection configured on this searcher.
+ #[inline]
+ pub fn binary_detection(&self) -> &BinaryDetection {
+ &self.config.binary
+ }
+
/// Returns true if and only if this searcher is configured to invert its
/// search results. That is, matching lines are lines that do **not** match
/// the searcher's matcher.
diff --git a/grep-searcher/src/sink.rs b/grep-searcher/src/sink.rs
index bf2316f7..63a8ae24 100644
--- a/grep-searcher/src/sink.rs
+++ b/grep-searcher/src/sink.rs
@@ -167,6 +167,28 @@ pub trait Sink {
Ok(true)
}
+ /// This method is called whenever binary detection is enabled and binary
+ /// data is found. If binary data is found, then this is called at least
+ /// once for the first occurrence with the absolute byte offset at which
+ /// the binary data begins.
+ ///
+ /// If this returns `true`, then searching continues. If this returns
+ /// `false`, then searching is stopped immediately and `finish` is called.
+ ///
+ /// If this returns an error, then searching is stopped immediately,
+ /// `finish` is not called and the error is bubbled back up to the caller
+ /// of the searcher.
+ ///
+ /// By default, it does nothing and returns `true`.
+ #[inline]
+ fn binary_data(
+ &mut self,
+ _searcher: &Searcher,
+ _binary_byte_offset: u64,
+ ) -> Result<bool, Self::Error> {
+ Ok(true)
+ }
+
/// This method is called when a search has begun, before any search is
/// executed. By default, this does nothing.
///
@@ -229,6 +251,15 @@ impl<'a, S: Sink> Sink for &'a mut S {
}
#[inline]
+ fn binary_data(
+ &mut self,
+ searcher: &Searcher,
+ binary_byte_offset: u64,
+ ) -> Result<bool, S::Error> {
+ (**self).binary_data(searcher, binary_byte_offset)
+ }
+
+ #[inline]
fn begin(
&mut self,
searcher: &Searcher,
@@ -276,6 +307,15 @@ impl<S: Sink + ?Sized> Sink for Box<S> {
}
#[inline]
+ fn binary_data(
+ &mut self,
+ searcher: &Searcher,
+ binary_byte_offset: u64,
+ ) -> Result<bool, S::Error> {
+ (**self).binary_data(searcher, binary_byte_offset)
+ }
+
+ #[inline]
fn begin(
&mut self,
searcher: &Searcher,