diff options
Diffstat (limited to 'grep-searcher/src')
-rw-r--r-- | grep-searcher/src/line_buffer.rs | 8 | ||||
-rw-r--r-- | grep-searcher/src/searcher/core.rs | 36 | ||||
-rw-r--r-- | grep-searcher/src/searcher/glue.rs | 19 | ||||
-rw-r--r-- | grep-searcher/src/searcher/mod.rs | 62 | ||||
-rw-r--r-- | grep-searcher/src/sink.rs | 40 |
5 files changed, 135 insertions, 30 deletions
diff --git a/grep-searcher/src/line_buffer.rs b/grep-searcher/src/line_buffer.rs index c2e54a9e..cc7dd578 100644 --- a/grep-searcher/src/line_buffer.rs +++ b/grep-searcher/src/line_buffer.rs @@ -317,6 +317,14 @@ pub struct LineBuffer { } impl LineBuffer { + /// Set the binary detection method used on this line buffer. + /// + /// This permits dynamically changing the binary detection strategy on + /// an existing line buffer without needing to create a new one. + pub fn set_binary_detection(&mut self, binary: BinaryDetection) { + self.config.binary = binary; + } + /// Reset this buffer, such that it can be used with a new reader. fn clear(&mut self) { self.pos = 0; diff --git a/grep-searcher/src/searcher/core.rs b/grep-searcher/src/searcher/core.rs index ff2cd18d..dd621bba 100644 --- a/grep-searcher/src/searcher/core.rs +++ b/grep-searcher/src/searcher/core.rs @@ -90,6 +90,13 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { self.sink_matched(buf, range) } + pub fn binary_data( + &mut self, + binary_byte_offset: u64, + ) -> Result<bool, S::Error> { + self.sink.binary_data(&self.searcher, binary_byte_offset) + } + pub fn begin(&mut self) -> Result<bool, S::Error> { self.sink.begin(&self.searcher) } @@ -141,19 +148,28 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { consumed } - pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool { + pub fn detect_binary( + &mut self, + buf: &[u8], + range: &Range, + ) -> Result<bool, S::Error> { if self.binary_byte_offset.is_some() { - return true; + return Ok(self.config.binary.quit_byte().is_some()); } let binary_byte = match self.config.binary.0 { BinaryDetection::Quit(b) => b, - _ => return false, + BinaryDetection::Convert(b) => b, + _ => return Ok(false), }; if let Some(i) = B(&buf[*range]).find_byte(binary_byte) { - self.binary_byte_offset = Some(range.start() + i); - true + let offset = range.start() + i; + self.binary_byte_offset = Some(offset); + if !self.binary_data(offset as u64)? { + return Ok(true); + } + Ok(self.config.binary.quit_byte().is_some()) } else { - false + Ok(false) } } @@ -416,7 +432,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { buf: &[u8], range: &Range, ) -> Result<bool, S::Error> { - if self.binary && self.detect_binary(buf, range) { + if self.binary && self.detect_binary(buf, range)? { return Ok(false); } if !self.sink_break_context(range.start())? { @@ -448,7 +464,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { buf: &[u8], range: &Range, ) -> Result<bool, S::Error> { - if self.binary && self.detect_binary(buf, range) { + if self.binary && self.detect_binary(buf, range)? { return Ok(false); } self.count_lines(buf, range.start()); @@ -478,7 +494,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { ) -> Result<bool, S::Error> { assert!(self.after_context_left >= 1); - if self.binary && self.detect_binary(buf, range) { + if self.binary && self.detect_binary(buf, range)? { return Ok(false); } self.count_lines(buf, range.start()); @@ -507,7 +523,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { buf: &[u8], range: &Range, ) -> Result<bool, S::Error> { - if self.binary && self.detect_binary(buf, range) { + if self.binary && self.detect_binary(buf, range)? { return Ok(false); } self.count_lines(buf, range.start()); diff --git a/grep-searcher/src/searcher/glue.rs b/grep-searcher/src/searcher/glue.rs index 3a5d4291..4f362dab 100644 --- a/grep-searcher/src/searcher/glue.rs +++ b/grep-searcher/src/searcher/glue.rs @@ -51,6 +51,7 @@ where M: Matcher, fn fill(&mut self) -> Result<bool, S::Error> { assert!(self.rdr.buffer()[self.core.pos()..].is_empty()); + let already_binary = self.rdr.binary_byte_offset().is_some(); let old_buf_len = self.rdr.buffer().len(); let consumed = self.core.roll(self.rdr.buffer()); self.rdr.consume(consumed); @@ -58,7 +59,14 @@ where M: Matcher, Err(err) => return Err(S::Error::error_io(err)), Ok(didread) => didread, }; - if !didread || self.rdr.binary_byte_offset().is_some() { + if !already_binary { + if let Some(offset) = self.rdr.binary_byte_offset() { + if !self.core.binary_data(offset)? { + return Ok(false); + } + } + } + if !didread || self.should_binary_quit() { return Ok(false); } // If rolling the buffer didn't result in consuming anything and if @@ -71,6 +79,11 @@ where M: Matcher, } Ok(true) } + + fn should_binary_quit(&self) -> bool { + self.rdr.binary_byte_offset().is_some() + && self.config.binary.quit_byte().is_some() + } } #[derive(Debug)] @@ -103,7 +116,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> { DEFAULT_BUFFER_CAPACITY, ); let binary_range = Range::new(0, binary_upto); - if !self.core.detect_binary(self.slice, &binary_range) { + if !self.core.detect_binary(self.slice, &binary_range)? { while !self.slice[self.core.pos()..].is_empty() && self.core.match_by_line(self.slice)? @@ -155,7 +168,7 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> { DEFAULT_BUFFER_CAPACITY, ); let binary_range = Range::new(0, binary_upto); - if !self.core.detect_binary(self.slice, &binary_range) { + if !self.core.detect_binary(self.slice, &binary_range)? { let mut keepgoing = true; while !self.slice[self.core.pos()..].is_empty() && keepgoing { keepgoing = self.sink()?; diff --git a/grep-searcher/src/searcher/mod.rs b/grep-searcher/src/searcher/mod.rs index 729b491b..e20e04a3 100644 --- a/grep-searcher/src/searcher/mod.rs +++ b/grep-searcher/src/searcher/mod.rs @@ -75,25 +75,41 @@ impl BinaryDetection { BinaryDetection(line_buffer::BinaryDetection::Quit(binary_byte)) } - // TODO(burntsushi): Figure out how to make binary conversion work. This - // permits implementing GNU grep's default behavior, which is to zap NUL - // bytes but still execute a search (if a match is detected, then GNU grep - // stops and reports that a match was found but doesn't print the matching - // line itself). - // - // This behavior is pretty simple to implement using the line buffer (and - // in fact, it is already implemented and tested), since there's a fixed - // size buffer that we can easily write to. The issue arises when searching - // a `&[u8]` (whether on the heap or via a memory map), since this isn't - // something we can easily write to. - - /// The given byte is searched in all contents read by the line buffer. If - /// it occurs, then it is replaced by the line terminator. The line buffer - /// guarantees that this byte will never be observable by callers. - #[allow(dead_code)] - fn convert(binary_byte: u8) -> BinaryDetection { + /// Binary detection is performed by looking for the given byte, and + /// replacing it with the line terminator configured on the searcher. + /// (If the searcher is configured to use `CRLF` as the line terminator, + /// then this byte is replaced by just `LF`.) + /// + /// When searching is performed using a fixed size buffer, then the + /// contents of that buffer are always searched for the presence of this + /// byte and replaced with the line terminator. In effect, the caller is + /// guaranteed to never observe this byte while searching. + /// + /// When searching is performed with the entire contents mapped into + /// memory, then this setting has no effect and is ignored. + pub fn convert(binary_byte: u8) -> BinaryDetection { BinaryDetection(line_buffer::BinaryDetection::Convert(binary_byte)) } + + /// If this binary detection uses the "quit" strategy, then this returns + /// the byte that will cause a search to quit. In any other case, this + /// returns `None`. + pub fn quit_byte(&self) -> Option<u8> { + match self.0 { + line_buffer::BinaryDetection::Quit(b) => Some(b), + _ => None, + } + } + + /// If this binary detection uses the "convert" strategy, then this returns + /// the byte that will be replaced by the line terminator. In any other + /// case, this returns `None`. + pub fn convert_byte(&self) -> Option<u8> { + match self.0 { + line_buffer::BinaryDetection::Convert(b) => Some(b), + _ => None, + } + } } /// An encoding to use when searching. @@ -739,6 +755,12 @@ impl Searcher { } } + /// Set the binary detection method used on this searcher. + pub fn set_binary_detection(&mut self, detection: BinaryDetection) { + self.config.binary = detection.clone(); + self.line_buffer.borrow_mut().set_binary_detection(detection.0); + } + /// Check that the searcher's configuration and the matcher are consistent /// with each other. fn check_config<M: Matcher>(&self, matcher: M) -> Result<(), ConfigError> { @@ -778,6 +800,12 @@ impl Searcher { self.config.line_term } + /// Returns the type of binary detection configured on this searcher. + #[inline] + pub fn binary_detection(&self) -> &BinaryDetection { + &self.config.binary + } + /// Returns true if and only if this searcher is configured to invert its /// search results. That is, matching lines are lines that do **not** match /// the searcher's matcher. diff --git a/grep-searcher/src/sink.rs b/grep-searcher/src/sink.rs index bf2316f7..63a8ae24 100644 --- a/grep-searcher/src/sink.rs +++ b/grep-searcher/src/sink.rs @@ -167,6 +167,28 @@ pub trait Sink { Ok(true) } + /// This method is called whenever binary detection is enabled and binary + /// data is found. If binary data is found, then this is called at least + /// once for the first occurrence with the absolute byte offset at which + /// the binary data begins. + /// + /// If this returns `true`, then searching continues. If this returns + /// `false`, then searching is stopped immediately and `finish` is called. + /// + /// If this returns an error, then searching is stopped immediately, + /// `finish` is not called and the error is bubbled back up to the caller + /// of the searcher. + /// + /// By default, it does nothing and returns `true`. + #[inline] + fn binary_data( + &mut self, + _searcher: &Searcher, + _binary_byte_offset: u64, + ) -> Result<bool, Self::Error> { + Ok(true) + } + /// This method is called when a search has begun, before any search is /// executed. By default, this does nothing. /// @@ -229,6 +251,15 @@ impl<'a, S: Sink> Sink for &'a mut S { } #[inline] + fn binary_data( + &mut self, + searcher: &Searcher, + binary_byte_offset: u64, + ) -> Result<bool, S::Error> { + (**self).binary_data(searcher, binary_byte_offset) + } + + #[inline] fn begin( &mut self, searcher: &Searcher, @@ -276,6 +307,15 @@ impl<S: Sink + ?Sized> Sink for Box<S> { } #[inline] + fn binary_data( + &mut self, + searcher: &Searcher, + binary_byte_offset: u64, + ) -> Result<bool, S::Error> { + (**self).binary_data(searcher, binary_byte_offset) + } + + #[inline] fn begin( &mut self, searcher: &Searcher, |