summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2023-09-28 12:58:11 -0400
committerAndrew Gallant <jamslam@gmail.com>2023-10-09 20:29:52 -0400
commitd53b7310ee1424a292228f7e8986fa9306f22a7f (patch)
treee4fb8f7bf0fd481cef181ad72736d80ef3670491
parente30bbb8cff1fc59c9e7671c0aca1e8c664bde7df (diff)
searcher: polish
This updates some dependencies and brings code style in line with my current practice.
-rw-r--r--Cargo.lock4
-rw-r--r--crates/searcher/Cargo.toml14
-rw-r--r--crates/searcher/src/lib.rs72
-rw-r--r--crates/searcher/src/line_buffer.rs55
-rw-r--r--crates/searcher/src/lines.rs32
-rw-r--r--crates/searcher/src/searcher/core.rs65
-rw-r--r--crates/searcher/src/searcher/glue.rs48
-rw-r--r--crates/searcher/src/searcher/mmap.rs3
-rw-r--r--crates/searcher/src/searcher/mod.rs72
-rw-r--r--crates/searcher/src/sink.rs66
-rw-r--r--crates/searcher/src/testutil.rs85
11 files changed, 269 insertions, 247 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 7be0552b..8e45d316 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -319,9 +319,9 @@ checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c"
[[package]]
name = "memmap2"
-version = "0.5.10"
+version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327"
+checksum = "43a5a03cefb0d953ec0be133036f14e109412fa594edc2f77227249db66cc3ed"
dependencies = [
"libc",
]
diff --git a/crates/searcher/Cargo.toml b/crates/searcher/Cargo.toml
index 579d1370..5508bb0d 100644
--- a/crates/searcher/Cargo.toml
+++ b/crates/searcher/Cargo.toml
@@ -11,16 +11,16 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense OR MIT"
-edition = "2018"
+edition = "2021"
[dependencies]
-bstr = { version = "1.6.0", default-features = false, features = ["std"] }
-encoding_rs = "0.8.14"
-encoding_rs_io = "0.1.6"
+bstr = { version = "1.6.2", default-features = false, features = ["std"] }
+encoding_rs = "0.8.33"
+encoding_rs_io = "0.1.7"
grep-matcher = { version = "0.1.6", path = "../matcher" }
-log = "0.4.5"
-memchr = "2.6.2"
-memmap = { package = "memmap2", version = "0.5.3" }
+log = "0.4.20"
+memchr = "2.6.3"
+memmap = { package = "memmap2", version = "0.8.0" }
[dev-dependencies]
grep-regex = { version = "0.1.11", path = "../regex" }
diff --git a/crates/searcher/src/lib.rs b/crates/searcher/src/lib.rs
index 20d38ffe..4f5bd159 100644
--- a/crates/searcher/src/lib.rs
+++ b/crates/searcher/src/lib.rs
@@ -38,12 +38,12 @@ This example shows how to execute the searcher and read the search results
using the [`UTF8`](sinks::UTF8) implementation of `Sink`.
```
-use std::error::Error;
-
-use grep_matcher::Matcher;
-use grep_regex::RegexMatcher;
-use grep_searcher::Searcher;
-use grep_searcher::sinks::UTF8;
+use {
+ grep_matcher::Matcher,
+ grep_regex::RegexMatcher,
+ grep_searcher::Searcher,
+ grep_searcher::sinks::UTF8,
+};
const SHERLOCK: &'static [u8] = b"\
For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -54,28 +54,26 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
-# fn main() { example().unwrap() }
-fn example() -> Result<(), Box<Error>> {
- let matcher = RegexMatcher::new(r"Doctor \w+")?;
- let mut matches: Vec<(u64, String)> = vec![];
- Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| {
- // We are guaranteed to find a match, so the unwrap is OK.
- let mymatch = matcher.find(line.as_bytes())?.unwrap();
- matches.push((lnum, line[mymatch].to_string()));
- Ok(true)
- }))?;
-
- assert_eq!(matches.len(), 2);
- assert_eq!(
- matches[0],
- (1, "Doctor Watsons".to_string())
- );
- assert_eq!(
- matches[1],
- (5, "Doctor Watson".to_string())
- );
- Ok(())
-}
+let matcher = RegexMatcher::new(r"Doctor \w+")?;
+let mut matches: Vec<(u64, String)> = vec![];
+Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| {
+ // We are guaranteed to find a match, so the unwrap is OK.
+ let mymatch = matcher.find(line.as_bytes())?.unwrap();
+ matches.push((lnum, line[mymatch].to_string()));
+ Ok(true)
+}))?;
+
+assert_eq!(matches.len(), 2);
+assert_eq!(
+ matches[0],
+ (1, "Doctor Watsons".to_string())
+);
+assert_eq!(
+ matches[1],
+ (5, "Doctor Watson".to_string())
+);
+
+# Ok::<(), Box<dyn std::error::Error>>(())
```
See also `examples/search-stdin.rs` from the root of this crate's directory
@@ -85,14 +83,16 @@ searches stdin.
#![deny(missing_docs)]
-pub use crate::lines::{LineIter, LineStep};
-pub use crate::searcher::{
- BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher,
- SearcherBuilder,
-};
-pub use crate::sink::sinks;
-pub use crate::sink::{
- Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
+pub use crate::{
+ lines::{LineIter, LineStep},
+ searcher::{
+ BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher,
+ SearcherBuilder,
+ },
+ sink::{
+ sinks, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
+ SinkMatch,
+ },
};
#[macro_use]
diff --git a/crates/searcher/src/line_buffer.rs b/crates/searcher/src/line_buffer.rs
index aaa81d26..a09d4e17 100644
--- a/crates/searcher/src/line_buffer.rs
+++ b/crates/searcher/src/line_buffer.rs
@@ -1,4 +1,3 @@
-use std::cmp;
use std::io;
use bstr::ByteSlice;
@@ -15,7 +14,7 @@ pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
///
/// The default is to eagerly allocate without a limit.
#[derive(Clone, Copy, Debug)]
-pub enum BufferAllocation {
+pub(crate) enum BufferAllocation {
/// Attempt to expand the size of the buffer until either at least the next
/// line fits into memory or until all available memory is exhausted.
///
@@ -35,7 +34,7 @@ impl Default for BufferAllocation {
/// Create a new error to be used when a configured allocation limit has been
/// reached.
-pub fn alloc_error(limit: usize) -> io::Error {
+pub(crate) fn alloc_error(limit: usize) -> io::Error {
let msg = format!("configured allocation limit ({}) exceeded", limit);
io::Error::new(io::ErrorKind::Other, msg)
}
@@ -49,7 +48,7 @@ pub fn alloc_error(limit: usize) -> io::Error {
/// using textual patterns. Of course, there are many cases in which this isn't
/// true, which is why binary detection is disabled by default.
#[derive(Clone, Copy, Debug)]
-pub enum BinaryDetection {
+pub(crate) enum BinaryDetection {
/// No binary detection is performed. Data reported by the line buffer may
/// contain arbitrary bytes.
None,
@@ -108,18 +107,18 @@ impl Default for Config {
/// A builder for constructing line buffers.
#[derive(Clone, Debug, Default)]
-pub struct LineBufferBuilder {
+pub(crate) struct LineBufferBuilder {
config: Config,
}
impl LineBufferBuilder {
/// Create a new builder for a buffer.
- pub fn new() -> LineBufferBuilder {
+ pub(crate) fn new() -> LineBufferBuilder {
LineBufferBuilder { config: Config::default() }
}
/// Create a new line buffer from this builder's configuration.
- pub fn build(&self) -> LineBuffer {
+ pub(crate) fn build(&self) -> LineBuffer {
LineBuffer {
config: self.config,
buf: vec![0; self.config.capacity],
@@ -139,7 +138,10 @@ impl LineBufferBuilder {
///
/// This is set to a reasonable default and probably shouldn't be changed
/// unless there's a specific reason to do so.
- pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder {
+ pub(crate) fn capacity(
+ &mut self,
+ capacity: usize,
+ ) -> &mut LineBufferBuilder {
self.config.capacity = capacity;
self
}
@@ -155,7 +157,10 @@ impl LineBufferBuilder {
/// is incomplete.
///
/// By default, this is set to `b'\n'`.
- pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder {
+ pub(crate) fn line_terminator(
+ &mut self,
+ lineterm: u8,
+ ) -> &mut LineBufferBuilder {
self.config.lineterm = lineterm;
self
}
@@ -174,7 +179,7 @@ impl LineBufferBuilder {
/// a value of `0` is sensible, and in particular, will guarantee that a
/// line buffer will never allocate additional memory beyond its initial
/// capacity.
- pub fn buffer_alloc(
+ pub(crate) fn buffer_alloc(
&mut self,
behavior: BufferAllocation,
) -> &mut LineBufferBuilder {
@@ -188,7 +193,7 @@ impl LineBufferBuilder {
///
/// By default, this is disabled. In general, binary detection should be
/// viewed as an imperfect heuristic.
- pub fn binary_detection(
+ pub(crate) fn binary_detection(
&mut self,
detection: BinaryDetection,
) -> &mut LineBufferBuilder {
@@ -200,7 +205,7 @@ impl LineBufferBuilder {
/// A line buffer reader efficiently reads a line oriented buffer from an
/// arbitrary reader.
#[derive(Debug)]
-pub struct LineBufferReader<'b, R> {
+pub(crate) struct LineBufferReader<'b, R> {
rdr: R,
line_buffer: &'b mut LineBuffer,
}
@@ -211,7 +216,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
///
/// This does not change the binary detection behavior of the given line
/// buffer.
- pub fn new(
+ pub(crate) fn new(
rdr: R,
line_buffer: &'b mut LineBuffer,
) -> LineBufferReader<'b, R> {
@@ -225,13 +230,13 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// correspond to an offset in memory. It is typically used for reporting
/// purposes. It can also be used for counting the number of bytes that
/// have been searched.
- pub fn absolute_byte_offset(&self) -> u64 {
+ pub(crate) fn absolute_byte_offset(&self) -> u64 {
self.line_buffer.absolute_byte_offset()
}
/// If binary data was detected, then this returns the absolute byte offset
/// at which binary data was initially found.
- pub fn binary_byte_offset(&self) -> Option<u64> {
+ pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
self.line_buffer.binary_byte_offset()
}
@@ -248,25 +253,25 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// This forwards any errors returned by the underlying reader, and will
/// also return an error if the buffer must be expanded past its allocation
/// limit, as governed by the buffer allocation strategy.
- pub fn fill(&mut self) -> Result<bool, io::Error> {
+ pub(crate) fn fill(&mut self) -> Result<bool, io::Error> {
self.line_buffer.fill(&mut self.rdr)
}
/// Return the contents of this buffer.
- pub fn buffer(&self) -> &[u8] {
+ pub(crate) fn buffer(&self) -> &[u8] {
self.line_buffer.buffer()
}
/// Return the buffer as a BStr, used for convenient equality checking
/// in tests only.
#[cfg(test)]
- fn bstr(&self) -> &::bstr::BStr {
+ fn bstr(&self) -> &bstr::BStr {
self.buffer().as_bstr()
}
/// Consume the number of bytes provided. This must be less than or equal
/// to the number of bytes returned by `buffer`.
- pub fn consume(&mut self, amt: usize) {
+ pub(crate) fn consume(&mut self, amt: usize) {
self.line_buffer.consume(amt);
}
@@ -286,7 +291,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
/// Line buffers cannot be used directly, but instead must be used via the
/// LineBufferReader.
#[derive(Clone, Debug)]
-pub struct LineBuffer {
+pub(crate) struct LineBuffer {
/// The configuration of this buffer.
config: Config,
/// The primary buffer with which to hold data.
@@ -322,7 +327,7 @@ impl LineBuffer {
///
/// This permits dynamically changing the binary detection strategy on
/// an existing line buffer without needing to create a new one.
- pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
+ pub(crate) fn set_binary_detection(&mut self, binary: BinaryDetection) {
self.config.binary = binary;
}
@@ -497,12 +502,12 @@ impl LineBuffer {
}
// `len` is used for computing the next allocation size. The capacity
// is permitted to start at `0`, so we make sure it's at least `1`.
- let len = cmp::max(1, self.buf.len());
+ let len = std::cmp::max(1, self.buf.len());
let additional = match self.config.buffer_alloc {
BufferAllocation::Eager => len * 2,
BufferAllocation::Error(limit) => {
let used = self.buf.len() - self.config.capacity;
- let n = cmp::min(len * 2, limit - used);
+ let n = std::cmp::min(len * 2, limit - used);
if n == 0 {
return Err(alloc_error(self.config.capacity + limit));
}
@@ -541,9 +546,9 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
#[cfg(test)]
mod tests {
- use super::*;
use bstr::{ByteSlice, ByteVec};
- use std::str;
+
+ use super::*;
const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
diff --git a/crates/searcher/src/lines.rs b/crates/searcher/src/lines.rs
index 5e47c9b3..98f54fa4 100644
--- a/crates/searcher/src/lines.rs
+++ b/crates/searcher/src/lines.rs
@@ -2,8 +2,10 @@
A collection of routines for performing operations on lines.
*/
-use bstr::ByteSlice;
-use grep_matcher::{LineTerminator, Match};
+use {
+ bstr::ByteSlice,
+ grep_matcher::{LineTerminator, Match},
+};
/// An iterator over lines in a particular slice of bytes.
///
@@ -21,10 +23,8 @@ impl<'b> LineIter<'b> {
/// Create a new line iterator that yields lines in the given bytes that
/// are terminated by `line_term`.
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
- LineIter {
- bytes: bytes,
- stepper: LineStep::new(line_term, 0, bytes.len()),
- }
+ let stepper = LineStep::new(line_term, 0, bytes.len());
+ LineIter { bytes, stepper }
}
}
@@ -61,7 +61,7 @@ impl LineStep {
///
/// This panics if `start` is not less than or equal to `end`.
pub fn new(line_term: u8, start: usize, end: usize) -> LineStep {
- LineStep { line_term, pos: start, end: end }
+ LineStep { line_term, pos: start, end }
}
/// Return the start and end position of the next line in the given bytes.
@@ -108,14 +108,17 @@ impl LineStep {
}
/// Count the number of occurrences of `line_term` in `bytes`.
-pub fn count(bytes: &[u8], line_term: u8) -> u64 {
+pub(crate) fn count(bytes: &[u8], line_term: u8) -> u64 {
memchr::memchr_iter(line_term, bytes).count() as u64
}
/// Given a line that possibly ends with a terminator, return that line without
/// the terminator.
#[inline(always)]
-pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
+pub(crate) fn without_terminator(
+ bytes: &[u8],
+ line_term: LineTerminator,
+) -> &[u8] {
let line_term = line_term.as_bytes();
let start = bytes.len().saturating_sub(line_term.len());
if bytes.get(start..) == Some(line_term) {
@@ -129,7 +132,7 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
///
/// Line terminators are considered part of the line they terminate.
#[inline(always)]
-pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
+pub(crate) fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
let line_start =
bytes[..range.start()].rfind_byte(line_term).map_or(0, |i| i + 1);
let line_end =
@@ -151,7 +154,7 @@ pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match {
///
/// If `bytes` ends with a line terminator, then the terminator itself is
/// considered part of the last line.
-pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
+pub(crate) fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
preceding_by_pos(bytes, bytes.len(), line_term, count)
}
@@ -195,10 +198,9 @@ fn preceding_by_pos(
#[cfg(test)]
mod tests {
- use super::*;
use grep_matcher::Match;
- use std::ops::Range;
- use std::str;
+
+ use super::*;
const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -222,7 +224,7 @@ and exhibited clearly, with a label attached.\
results
}
- fn line_ranges(text: &str) -> Vec<Range<usize>> {
+ fn line_ranges(text: &str) -> Vec<std::ops::Range<usize>> {
let mut results = vec![];
let mut it = LineStep::new(b'\n', 0, text.len());
while let Some(m) = it.next_match(text.as_bytes()) {
diff --git a/crates/searcher/src/searcher/core.rs b/crates/searcher/src/searcher/core.rs
index 95b4ba6a..e6836e6a 100644
--- a/crates/searcher/src/searcher/core.rs
+++ b/crates/searcher/src/searcher/core.rs
@@ -1,15 +1,16 @@
-use std::cmp;
-
use bstr::ByteSlice;
-use crate::line_buffer::BinaryDetection;
-use crate::lines::{self, LineStep};
-use crate::searcher::{Config, Range, Searcher};
-use crate::sink::{
- Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
-};
use grep_matcher::{LineMatchKind, Matcher};
+use crate::{
+ line_buffer::BinaryDetection,
+ lines::{self, LineStep},
+ searcher::{Config, Range, Searcher},
+ sink::{
+ Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
+ },
+};
+
enum FastMatchResult {
Continue,
Stop,
@@ -17,7 +18,7 @@ enum FastMatchResult {
}
#[derive(Debug)]
-pub struct Core<'s, M: 's, S> {
+pub(crate) struct Core<'s, M: 's, S> {
config: &'s Config,
matcher: M,
searcher: &'s Searcher,
@@ -35,7 +36,7 @@ pub struct Core<'s, M: 's, S> {
}
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
- pub fn new(
+ pub(crate) fn new(
searcher: &'s Searcher,
matcher: M,
sink: S,
@@ -45,14 +46,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if searcher.config.line_number { Some(1) } else { None };
let core = Core {
config: &searcher.config,
- matcher: matcher,
- searcher: searcher,
- sink: sink,
- binary: binary,
+ matcher,
+ searcher,
+ sink,
+ binary,
pos: 0,
absolute_byte_offset: 0,
binary_byte_offset: None,
- line_number: line_number,
+ line_number,
last_line_counted: 0,
last_line_visited: 0,
after_context_left: 0,
@@ -69,23 +70,23 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
core
}
- pub fn pos(&self) -> usize {
+ pub(crate) fn pos(&self) -> usize {
self.pos
}
- pub fn set_pos(&mut self, pos: usize) {
+ pub(crate) fn set_pos(&mut self, pos: usize) {
self.pos = pos;
}
- pub fn binary_byte_offset(&self) -> Option<u64> {
+ pub(crate) fn binary_byte_offset(&self) -> Option<u64> {
self.binary_byte_offset.map(|offset| offset as u64)
}
- pub fn matcher(&self) -> &M {
+ pub(crate) fn matcher(&self) -> &M {
&self.matcher
}
- pub fn matched(
+ pub(crate) fn matched(
&mut self,
buf: &[u8],
range: &Range,
@@ -93,18 +94,18 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.sink_matched(buf, range)
}
- pub fn binary_data(
+ pub(crate) fn binary_data(
&mut self,
binary_byte_offset: u64,
) -> Result<bool, S::Error> {
self.sink.binary_data(&self.searcher, binary_byte_offset)
}
- pub fn begin(&mut self) -> Result<bool, S::Error> {
+ pub(crate) fn begin(&mut self) -> Result<bool, S::Error> {
self.sink.begin(&self.searcher)
}
- pub fn finish(
+ pub(crate) fn finish(
&mut self,
byte_count: u64,
binary_byte_offset: Option<u64>,
@@ -115,7 +116,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
)
}
- pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+ pub(crate) fn match_by_line(
+ &mut self,
+ buf: &[u8],
+ ) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() {
match self.match_by_line_fast(buf)? {
FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf),
@@ -127,7 +131,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
}
- pub fn roll(&mut self, buf: &[u8]) -> usize {
+ pub(crate) fn roll(&mut self, buf: &[u8]) -> usize {
let consumed = if self.config.max_context() == 0 {
buf.len()
} else {
@@ -141,7 +145,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
self.config.line_term.as_byte(),
self.config.max_context(),
);
- let consumed = cmp::max(context_start, self.last_line_visited);
+ let consumed =
+ std::cmp::max(context_start, self.last_line_visited);
consumed
};
self.count_lines(buf, consumed);
@@ -152,7 +157,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
consumed
}
- pub fn detect_binary(
+ pub(crate) fn detect_binary(
&mut self,
buf: &[u8],
range: &Range,
@@ -177,7 +182,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
}
- pub fn before_context_by_line(
+ pub(crate) fn before_context_by_line(
&mut self,
buf: &[u8],
upto: usize,
@@ -213,7 +218,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true)
}
- pub fn after_context_by_line(
+ pub(crate) fn after_context_by_line(
&mut self,
buf: &[u8],
upto: usize,
@@ -238,7 +243,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true)
}
- pub fn other_context_by_line(
+ pub(crate) fn other_context_by_line(
&mut self,
buf: &[u8],
upto: usize,
diff --git a/crates/searcher/src/searcher/glue.rs b/crates/searcher/src/searcher/glue.rs
index 217c70e4..006afad3 100644
--- a/crates/searcher/src/searcher/glue.rs
+++ b/crates/searcher/src/searcher/glue.rs
@@ -1,16 +1,14 @@
-use std::cmp;
-use std::io;
-
-use crate::line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY};
-use crate::lines::{self, LineStep};
-use crate::sink::{Sink, SinkError};
use grep_matcher::Matcher;
-use crate::searcher::core::Core;
-use crate::searcher::{Config, Range, Searcher};
+use crate::{
+ line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY},
+ lines::{self, LineStep},
+ searcher::{core::Core, Config, Range, Searcher},
+ sink::{Sink, SinkError},
+};
#[derive(Debug)]
-pub struct ReadByLine<'s, M, R, S> {
+pub(crate) struct ReadByLine<'s, M, R, S> {
config: &'s Config,
core: Core<'s, M, S>,
rdr: LineBufferReader<'s, R>,
@@ -19,10 +17,10 @@ pub struct ReadByLine<'s, M, R, S> {
impl<'s, M, R, S> ReadByLine<'s, M, R, S>
where
M: Matcher,
- R: io::Read,
+ R: std::io::Read,
S: Sink,
{
- pub fn new(
+ pub(crate) fn new(
searcher: &'s Searcher,
matcher: M,
read_from: LineBufferReader<'s, R>,
@@ -37,7 +35,7 @@ where
}
}
- pub fn run(mut self) -> Result<(), S::Error> {
+ pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? {
while self.fill()? && self.core.match_by_line(self.rdr.buffer())? {
}
@@ -87,13 +85,13 @@ where
}
#[derive(Debug)]
-pub struct SliceByLine<'s, M, S> {
+pub(crate) struct SliceByLine<'s, M, S> {
core: Core<'s, M, S>,
slice: &'s [u8],
}
impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
- pub fn new(
+ pub(crate) fn new(
searcher: &'s Searcher,
matcher: M,
slice: &'s [u8],
@@ -103,14 +101,14 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
SliceByLine {
core: Core::new(searcher, matcher, write_to, true),
- slice: slice,
+ slice,
}
}
- pub fn run(mut self) -> Result<(), S::Error> {
+ pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? {
let binary_upto =
- cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
+ std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
let binary_range = Range::new(0, binary_upto);
if !self.core.detect_binary(self.slice, &binary_range)? {
while !self.slice[self.core.pos()..].is_empty()
@@ -132,7 +130,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
}
#[derive(Debug)]
-pub struct MultiLine<'s, M, S> {
+pub(crate) struct MultiLine<'s, M, S> {
config: &'s Config,
core: Core<'s, M, S>,
slice: &'s [u8],
@@ -140,7 +138,7 @@ pub struct MultiLine<'s, M, S> {
}
impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
- pub fn new(
+ pub(crate) fn new(
searcher: &'s Searcher,
matcher: M,
slice: &'s [u8],
@@ -151,15 +149,15 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
MultiLine {
config: &searcher.config,
core: Core::new(searcher, matcher, write_to, true),
- slice: slice,
+ slice,
last_match: None,
}
}
- pub fn run(mut self) -> Result<(), S::Error> {
+ pub(crate) fn run(mut self) -> Result<(), S::Error> {
if self.core.begin()? {
let binary_upto =
- cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
+ std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY);
let binary_range = Range::new(0, binary_upto);
if !self.core.detect_binary(self.slice, &binary_range)? {
let mut keepgoing = true;
@@ -347,8 +345,10 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
#[cfg(test)]
mod tests {
- use crate::searcher::{BinaryDetection, SearcherBuilder};
- use crate::testutil::{KitchenSink, RegexMatcher, SearcherTester};
+ use crate::{
+ searcher::{BinaryDetection, SearcherBuilder},
+ testutil::{KitchenSink, RegexMatcher, SearcherTester},
+ };
use super::*;
diff --git a/crates/searcher/src/searcher/mmap.rs b/crates/searcher/src/searcher/mmap.rs
index 0ab2d53f..3774a621 100644
--- a/crates/searcher/src/searcher/mmap.rs
+++ b/crates/searcher/src/searcher/mmap.rs
@@ -1,5 +1,4 @@
-use std::fs::File;
-use std::path::Path;
+use std::{fs::File, path::Path};
use memmap::Mmap;
diff --git a/crates/searcher/src/searcher/mod.rs b/crates/searcher/src/searcher/mod.rs
index b6b8f38f..abbc0209 100644
--- a/crates/searcher/src/searcher/mod.rs
+++ b/crates/searcher/src/searcher/mod.rs
@@ -1,19 +1,25 @@
-use std::cell::RefCell;
-use std::cmp;
-use std::fmt;
-use std::fs::File;
-use std::io::{self, Read};
-use std::path::Path;
-
-use crate::line_buffer::{
- self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
- LineBufferReader, DEFAULT_BUFFER_CAPACITY,
+use std::{
+ cell::RefCell,
+ cmp,
+ fs::File,
+ io::{self, Read},
+ path::Path,
+};
+
+use {
+ encoding_rs,
+ encoding_rs_io::DecodeReaderBytesBuilder,
+ grep_matcher::{LineTerminator, Match, Matcher},
+};
+
+use crate::{
+ line_buffer::{
+ self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
+ LineBufferReader, DEFAULT_BUFFER_CAPACITY,
+ },
+ searcher::glue::{MultiLine, ReadByLine, SliceByLine},
+ sink::{Sink, SinkError},
};
-use crate::searcher::glue::{MultiLine, ReadByLine, SliceByLine};
-use crate::sink::{Sink, SinkError};
-use encoding_rs;
-use encoding_rs_io::DecodeReaderBytesBuilder;
-use grep_matcher::{LineTerminator, Match, Matcher};
pub use self::mmap::MmapChoice;
@@ -232,6 +238,7 @@ impl Config {
/// This error occurs when a non-sensical configuration is present when trying
/// to construct a `Searcher` from a `SearcherBuilder`.
#[derive(Clone, Debug, Eq, PartialEq)]
+#[non_exhaustive]
pub enum ConfigError {
/// Indicates that the heap limit configuration prevents all possible
/// search strategies from being used. For example, if the heap limit is
@@ -250,23 +257,12 @@ pub enum ConfigError {
/// The provided encoding label that could not be found.
label: Vec<u8>,
},
- /// Hints that destructuring should not be exhaustive.
- ///
- /// This enum may grow additional variants, so this makes sure clients
- /// don't count on exhaustive matching. (Otherwise, adding a new variant
- /// could break existing code.)
- #[doc(hidden)]
- __Nonexhaustive,
}
-impl ::std::error::Error for ConfigError {
- fn description(&self) -> &str {
- "grep-searcher configuration error"
- }
-}
+impl std::error::Error for ConfigError {}
-impl fmt::Display for ConfigError {
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+impl std::fmt::Display for ConfigError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match *self {
ConfigError::SearchUnavailable => {
write!(f, "grep config error: no available searchers")
@@ -284,7 +280,6 @@ impl fmt::Display for ConfigError {
"grep config error: unknown encoding: {}",
String::from_utf8_lossy(label),
),
- _ => panic!("BUG: unexpected variant found"),
}
}
}
@@ -331,8 +326,8 @@ impl SearcherBuilder {
.bom_sniffing(self.config.bom_sniffing);
Searcher {
- config: config,
- decode_builder: decode_builder,
+ config,
+ decode_builder,
decode_buffer: RefCell::new(vec![0; 8 * (1 << 10)]),
line_buffer: RefCell::new(self.config.line_buffer()),
multi_line_buffer: RefCell::new(vec![]),
@@ -676,9 +671,9 @@ impl Searcher {
log::trace!("{:?}: searching via memory map", path);
return self.search_slice(matcher, &mmap, write_to);
}
- // Fast path for multi-line searches of files when memory maps are
- // not enabled. This pre-allocates a buffer roughly the size of the
- // file, which isn't possible when searching an arbitrary io::Read.
+ // Fast path for multi-line searches of files when memory maps are not
+ // enabled. This pre-allocates a buffer roughly the size of the file,
+ // which isn't possible when searching an arbitrary std::io::Read.
if self.multi_line_with_matcher(&matcher) {
log::trace!(
"{:?}: reading entire file on to heap for mulitline",
@@ -699,8 +694,8 @@ impl Searcher {
}
}
- /// Execute a search over any implementation of `io::Read` and write the
- /// results to the given sink.
+ /// Execute a search over any implementation of `std::io::Read` and write
+ /// the results to the given sink.
///
/// When possible, this implementation will search the reader incrementally
/// without reading it into memory. In some cases---for example, if multi
@@ -1016,9 +1011,10 @@ fn slice_has_bom(slice: &[u8]) -> bool {
#[cfg(test)]
mod tests {
- use super::*;
use crate::testutil::{KitchenSink, RegexMatcher};
+ use super::*;
+
#[test]
fn config_error_heap_limit() {
let matcher = RegexMatcher::new("");
diff --git a/crates/searcher/src/sink.rs b/crates/searcher/src/sink.rs
index 8621e73a..67d68987 100644
--- a/crates/searcher/src/sink.rs
+++ b/crates/searcher/src/sink.rs
@@ -1,23 +1,24 @@
-use std::error;
-use std::fmt;
use std::io;
use grep_matcher::LineTerminator;
-use crate::lines::LineIter;
-use crate::searcher::{ConfigError, Searcher};
+use crate::{
+ lines::