summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2019-01-20 12:32:09 -0500
committerAndrew Gallant <jamslam@gmail.com>2019-01-20 12:32:09 -0500
commit4b88e08f418d4c24223879a727efa5ad0c3c9058 (patch)
tree213f92d921b28d3d1841742085810005b9cf7bfd
parent7cbc535d70a53c81dfa3e58552c01f21c2e38d28 (diff)
search: migrate to bstrag/bstr-migration
This is an initial attempt at migrating grep-searcher to use the new bstr crate (not yet published). This is mostly an improvement, although a significant problem is that the grep-matcher crate controls the `Index` impls for the `Match` type, which we use quite heavily. Thus, in order to impl `Index` for `BStr`, we need add bstr as a public dependency to grep-matcher. This is really bad news because grep-matcher is supposed to be a light-weight core crate that defines a matcher interface, which is itself intended to be a public dependency. Thus, a semver bump on bstr will have very undesirable ripple effects thoughout ripgrep's library crates. This would be something we could stomach if bstr was solid at 1.0 and committed to avoiding breaking changes. But it's not there yet.
-rw-r--r--Cargo.lock11
-rw-r--r--grep-matcher/Cargo.toml7
-rw-r--r--grep-matcher/src/interpolate.rs4
-rw-r--r--grep-matcher/src/lib.rs20
-rw-r--r--grep-searcher/Cargo.toml7
-rw-r--r--grep-searcher/src/lib.rs2
-rw-r--r--grep-searcher/src/line_buffer.rs121
-rw-r--r--grep-searcher/src/lines.rs61
-rw-r--r--grep-searcher/src/searcher/core.rs50
-rw-r--r--grep-searcher/src/searcher/glue.rs13
-rw-r--r--grep-searcher/src/searcher/mod.rs25
-rw-r--r--grep-searcher/src/testutil.rs6
12 files changed, 169 insertions, 158 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 8f8965d3..20447b8b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -35,6 +35,13 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
+name = "bstr"
+version = "0.0.1"
+dependencies = [
+ "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
name = "bytecount"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -180,7 +187,7 @@ dependencies = [
name = "grep-matcher"
version = "0.1.1"
dependencies = [
- "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bstr 0.0.1",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -222,13 +229,13 @@ dependencies = [
name = "grep-searcher"
version = "0.1.1"
dependencies = [
+ "bstr 0.0.1",
"bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-matcher 0.1.1",
"grep-regex 0.1.1",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
diff --git a/grep-matcher/Cargo.toml b/grep-matcher/Cargo.toml
index 39391291..d65ee505 100644
--- a/grep-matcher/Cargo.toml
+++ b/grep-matcher/Cargo.toml
@@ -13,8 +13,11 @@ keywords = ["regex", "pattern", "trait"]
license = "Unlicense/MIT"
autotests = false
-[dependencies]
-memchr = "2.1"
+[dependencies.bstr]
+version = "*"
+path = "/home/andrew/rust/bstr"
+default-features = false
+features = ["std"]
[dev-dependencies]
regex = "1.1"
diff --git a/grep-matcher/src/interpolate.rs b/grep-matcher/src/interpolate.rs
index 168dd343..126ce521 100644
--- a/grep-matcher/src/interpolate.rs
+++ b/grep-matcher/src/interpolate.rs
@@ -1,6 +1,6 @@
use std::str;
-use memchr::memchr;
+use bstr::B;
/// Interpolate capture references in `replacement` and write the interpolation
/// result to `dst`. References in `replacement` take the form of $N or $name,
@@ -22,7 +22,7 @@ pub fn interpolate<A, N>(
N: FnMut(&str) -> Option<usize>
{
while !replacement.is_empty() {
- match memchr(b'$', replacement) {
+ match B(replacement).find_byte(b'$') {
None => break,
Some(i) => {
dst.extend(&replacement[..i]);
diff --git a/grep-matcher/src/lib.rs b/grep-matcher/src/lib.rs
index 9a067efa..ba59b923 100644
--- a/grep-matcher/src/lib.rs
+++ b/grep-matcher/src/lib.rs
@@ -38,13 +38,15 @@ implementations.
#![deny(missing_docs)]
-extern crate memchr;
+extern crate bstr;
use std::fmt;
use std::io;
use std::ops;
use std::u64;
+use bstr::BStr;
+
use interpolate::interpolate;
mod interpolate;
@@ -180,6 +182,22 @@ impl ops::IndexMut<Match> for [u8] {
}
}
+impl ops::Index<Match> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, index: Match) -> &BStr {
+ &self[index.start..index.end]
+ }
+}
+
+impl ops::IndexMut<Match> for BStr {
+ #[inline]
+ fn index_mut(&mut self, index: Match) -> &mut BStr {
+ &mut self[index.start..index.end]
+ }
+}
+
impl ops::Index<Match> for str {
type Output = str;
diff --git a/grep-searcher/Cargo.toml b/grep-searcher/Cargo.toml
index 4cf5fde3..1e5c4243 100644
--- a/grep-searcher/Cargo.toml
+++ b/grep-searcher/Cargo.toml
@@ -18,9 +18,14 @@ encoding_rs = "0.8.14"
encoding_rs_io = "0.1.3"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
log = "0.4.5"
-memchr = "2.1"
memmap = "0.7"
+[dependencies.bstr]
+version = "*"
+path = "/home/andrew/rust/bstr"
+default-features = false
+features = ["std"]
+
[dev-dependencies]
grep-regex = { version = "0.1.1", path = "../grep-regex" }
regex = "1.1"
diff --git a/grep-searcher/src/lib.rs b/grep-searcher/src/lib.rs
index f3ec02f2..6a9f4ba7 100644
--- a/grep-searcher/src/lib.rs
+++ b/grep-searcher/src/lib.rs
@@ -99,13 +99,13 @@ searches stdin.
#![deny(missing_docs)]
+extern crate bstr;
extern crate bytecount;
extern crate encoding_rs;
extern crate encoding_rs_io;
extern crate grep_matcher;
#[macro_use]
extern crate log;
-extern crate memchr;
extern crate memmap;
#[cfg(test)]
extern crate regex;
diff --git a/grep-searcher/src/line_buffer.rs b/grep-searcher/src/line_buffer.rs
index 0f5a2a7a..5a969743 100644
--- a/grep-searcher/src/line_buffer.rs
+++ b/grep-searcher/src/line_buffer.rs
@@ -1,8 +1,7 @@
use std::cmp;
use std::io;
-use std::ptr;
-use memchr::{memchr, memrchr};
+use bstr::{BStr, BString};
/// The default buffer capacity that we use for the line buffer.
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
@@ -123,7 +122,7 @@ impl LineBufferBuilder {
pub fn build(&self) -> LineBuffer {
LineBuffer {
config: self.config,
- buf: vec![0; self.config.capacity],
+ buf: BString::from(vec![0; self.config.capacity]),
pos: 0,
last_lineterm: 0,
end: 0,
@@ -254,7 +253,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
}
/// Return the contents of this buffer.
- pub fn buffer(&self) -> &[u8] {
+ pub fn buffer(&self) -> &BStr {
self.line_buffer.buffer()
}
@@ -284,7 +283,7 @@ pub struct LineBuffer {
/// The configuration of this buffer.
config: Config,
/// The primary buffer with which to hold data.
- buf: Vec<u8>,
+ buf: BString,
/// The current position of this buffer. This is always a valid sliceable
/// index into `buf`, and its maximum value is the length of `buf`.
pos: usize,
@@ -339,13 +338,13 @@ impl LineBuffer {
}
/// Return the contents of this buffer.
- fn buffer(&self) -> &[u8] {
+ fn buffer(&self) -> &BStr {
&self.buf[self.pos..self.last_lineterm]
}
/// Return the contents of the free space beyond the end of the buffer as
/// a mutable slice.
- fn free_buffer(&mut self) -> &mut [u8] {
+ fn free_buffer(&mut self) -> &mut BStr {
&mut self.buf[self.end..]
}
@@ -396,7 +395,7 @@ impl LineBuffer {
assert_eq!(self.pos, 0);
loop {
self.ensure_capacity()?;
- let readlen = rdr.read(self.free_buffer())?;
+ let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
if readlen == 0 {
// We're only done reading for good once the caller has
// consumed everything.
@@ -416,7 +415,7 @@ impl LineBuffer {
match self.config.binary {
BinaryDetection::None => {} // nothing to do
BinaryDetection::Quit(byte) => {
- if let Some(i) = memchr(byte, newbytes) {
+ if let Some(i) = newbytes.find_byte(byte) {
self.end = oldend + i;
self.last_lineterm = self.end;
self.binary_byte_offset =
@@ -444,7 +443,7 @@ impl LineBuffer {
}
// Update our `last_lineterm` positions if we read one.
- if let Some(i) = memrchr(self.config.lineterm, newbytes) {
+ if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
self.last_lineterm = oldend + i + 1;
return Ok(true);
}
@@ -467,40 +466,8 @@ impl LineBuffer {
return;
}
- assert!(self.pos < self.end && self.end <= self.buf.len());
let roll_len = self.end - self.pos;
- unsafe {
- // SAFETY: A buffer contains Copy data, so there's no problem
- // moving it around. Safety also depends on our indices being
- // in bounds, which they should always be, and we enforce with
- // an assert above.
- //
- // It seems like it should be possible to do this in safe code that
- // results in the same codegen. I tried the obvious:
- //
- // for (src, dst) in (self.pos..self.end).zip(0..) {
- // self.buf[dst] = self.buf[src];
- // }
- //
- // But the above does not work, and in fact compiles down to a slow
- // byte-by-byte loop. I tried a few other minor variations, but
- // alas, better minds might prevail.
- //
- // Overall, this doesn't save us *too* much. It mostly matters when
- // the number of bytes we're copying is large, which can happen
- // if the searcher is asked to produce a lot of context. We could
- // decide this isn't worth it, but it does make an appreciable
- // impact at or around the context=30 range on my machine.
- //
- // We could also use a temporary buffer that compiles down to two
- // memcpys and is faster than the byte-at-a-time loop, but it
- // complicates our options for limiting memory allocation a bit.
- ptr::copy(
- self.buf[self.pos..].as_ptr(),
- self.buf.as_mut_ptr(),
- roll_len,
- );
- }
+ self.buf.copy_within(self.pos.., 0);
self.pos = 0;
self.last_lineterm = roll_len;
self.end = roll_len;
@@ -536,14 +503,15 @@ impl LineBuffer {
}
}
-/// Replaces `src` with `replacement` in bytes.
-fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
+/// Replaces `src` with `replacement` in bytes, and return the offset of the
+/// first replacement, if one exists.
+fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
if src == replacement {
return None;
}
let mut first_pos = None;
let mut pos = 0;
- while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) {
+ while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
if first_pos.is_none() {
first_pos = Some(i);
}
@@ -560,6 +528,7 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
#[cfg(test)]
mod tests {
use std::str;
+ use bstr::BString;
use super::*;
const SHERLOCK: &'static str = "\
@@ -575,18 +544,14 @@ and exhibited clearly, with a label attached.\
slice.to_string()
}
- fn btos(slice: &[u8]) -> &str {
- str::from_utf8(slice).unwrap()
- }
-
fn replace_str(
slice: &str,
src: u8,
replacement: u8,
) -> (String, Option<usize>) {
- let mut dst = slice.to_string().into_bytes();
+ let mut dst = BString::from(slice);
let result = replace_bytes(&mut dst, src, replacement);
- (String::from_utf8(dst).unwrap(), result)
+ (dst.into_string().unwrap(), result)
}
#[test]
@@ -607,7 +572,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\n");
assert_eq!(rdr.absolute_byte_offset(), 0);
rdr.consume(5);
assert_eq!(rdr.absolute_byte_offset(), 5);
@@ -615,7 +580,7 @@ and exhibited clearly, with a label attached.\
assert_eq!(rdr.absolute_byte_offset(), 11);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "maggie");
+ assert_eq!(rdr.buffer(), "maggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -630,7 +595,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -645,7 +610,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\n");
+ assert_eq!(rdr.buffer(), "\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -660,7 +625,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\n\n");
+ assert_eq!(rdr.buffer(), "\n\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -698,12 +663,12 @@ and exhibited clearly, with a label attached.\
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
- let mut got = vec![];
+ let mut got = BString::new();
while rdr.fill().unwrap() {
- got.extend(rdr.buffer());
+ got.push(rdr.buffer());
rdr.consume_all();
}
- assert_eq!(bytes, btos(&got));
+ assert_eq!(bytes, got);
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
assert_eq!(rdr.binary_byte_offset(), None);
}
@@ -718,11 +683,11 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\n");
+ assert_eq!(rdr.buffer(), "homer\n");
rdr.consume_all();
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "lisa\n");
+ assert_eq!(rdr.buffer(), "lisa\n");
rdr.consume_all();
// This returns an error because while we have just enough room to
@@ -732,11 +697,11 @@ and exhibited clearly, with a label attached.\
assert!(rdr.fill().is_err());
// We can mush on though!
- assert_eq!(btos(rdr.buffer()), "m");
+ assert_eq!(rdr.buffer(), "m");
rdr.consume_all();
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "aggie");
+ assert_eq!(rdr.buffer(), "aggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -752,16 +717,16 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\n");
+ assert_eq!(rdr.buffer(), "homer\n");
rdr.consume_all();
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "lisa\n");
+ assert_eq!(rdr.buffer(), "lisa\n");
rdr.consume_all();
// We have just enough space.
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "maggie");
+ assert_eq!(rdr.buffer(), "maggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -777,7 +742,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().is_err());
- assert_eq!(btos(rdr.buffer()), "");
+ assert_eq!(rdr.buffer(), "");
}
#[test]
@@ -789,7 +754,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nli\x00sa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -808,7 +773,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nli");
+ assert_eq!(rdr.buffer(), "homer\nli");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -825,7 +790,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(!rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "");
+ assert_eq!(rdr.buffer(), "");
assert_eq!(rdr.absolute_byte_offset(), 0);
assert_eq!(rdr.binary_byte_offset(), Some(0));
}
@@ -841,7 +806,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -860,7 +825,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -878,7 +843,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\
+ assert_eq!(rdr.buffer(), "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, s\
");
@@ -901,7 +866,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nli\nsa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -920,7 +885,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "\nhomer\nlisa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -939,7 +904,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -958,7 +923,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
diff --git a/grep-searcher/src/lines.rs b/grep-searcher/src/lines.rs
index ed225a42..aac7a343 100644
--- a/grep-searcher/src/lines.rs
+++ b/grep-searcher/src/lines.rs
@@ -2,8 +2,8 @@
A collection of routines for performing operations on lines.
*/
+use bstr::{B, BStr};
use bytecount;
-use memchr::{memchr, memrchr};
use grep_matcher::{LineTerminator, Match};
/// An iterator over lines in a particular slice of bytes.
@@ -14,7 +14,7 @@ use grep_matcher::{LineTerminator, Match};
/// `'b` refers to the lifetime of the underlying bytes.
#[derive(Debug)]
pub struct LineIter<'b> {
- bytes: &'b [u8],
+ bytes: &'b BStr,
stepper: LineStep,
}
@@ -23,7 +23,7 @@ impl<'b> LineIter<'b> {
/// are terminated by `line_term`.
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
LineIter {
- bytes: bytes,
+ bytes: B(bytes),
stepper: LineStep::new(line_term, 0, bytes.len()),
}
}
@@ -33,7 +33,7 @@ impl<'b> Iterator for LineIter<'b> {
type Item = &'b [u8];
fn next(&mut self) -> Option<&'b [u8]> {
- self.stepper.next_match(self.bytes).map(|m| &self.bytes[m])
+ self.stepper.next_match(self.bytes).map(|m| self.bytes[m].as_bytes())
}
}
@@ -73,19 +73,19 @@ impl LineStep {
/// The range returned includes the line terminator. Ranges are always
/// non-empty.
pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
- self.next_impl(bytes)
+ self.next_impl(B(bytes))
}
/// Like next, but returns a `Match` instead of a tuple.
#[inline(always)]
- pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
+ pub(crate) fn next_match(&mut self, bytes: &BStr) -> Option<Match> {
self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
}
#[inline(always)]
- fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
+ fn next_impl(&mut self, mut bytes: &BStr) -> Option<(usize, usize)> {
bytes = &bytes[..self.end];
- match memchr(self.line_term, &bytes[self.pos..]) {
+ match bytes[self.pos..].find_byte(self.line_term) {
None => {
if self.pos < bytes.len() {
let m = (self.pos, bytes.len());
@@ -109,15 +109,15 @@ impl LineStep {
}
/// Count the number of occurrences of `line_term` in `bytes`.
-pub fn count(bytes: &[u8], line_term: u8) -> u64 {
- bytecount::count(bytes, line_term) as u64
+pub fn count(bytes: &BStr, line_term: u8) -> u64 {
+ bytecount::count(bytes.as_bytes(), line_term) as u64
}
/// Given a line that possibly ends with a terminator, return that line without
/// the terminator.
#[inline(always)]
-pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
- let line_term = line_term.as_bytes();
+pub fn without_terminator(bytes: &BStr, line_term: LineTerminator) -> &BStr {
+ let line_term = BStr::new(line_term.as_bytes());
let start = bytes.len().saturating_sub(line_term.len());
if bytes.get(start..) == Some(line_term) {
return &bytes[..bytes.len() - line_term.len()];
@@ -131,18 +131,20 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
/// Line terminators are considered part of the line they terminate.
#[inline(always)]
pub fn locate(
- bytes: &[u8],
+ bytes: &BStr,
line_term: u8,
range: Match,
) -> Match {
- let line_start = memrchr(line_term, &bytes[0..range.start()])
+ let line_start = bytes[..range.start()]
+ .rfind_byte(line_term)
.map_or(0, |i| i + 1);
let line_end =
if range.end() > line_start && bytes[range.end() - 1] == line_term {
range.end()
} else {
- memchr(line_term, &bytes[range.end()..])
- .map_or(bytes.len(), |i| range.end() + i + 1)
+ bytes[range.end()..]
+ .find_byte(line_term)
+ .map_or(bytes.len(), |i| range.end() + i + 1)
};
Match::new(line_start, line_end)
}
@@ -155,7 +157,7 @@ pub fn locate(
///
/// If `bytes` ends with a line terminator, then the terminator itself is
/// considered part of the last line.
-pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
+pub fn preceding(bytes: &BStr, line_term: u8, count: usize) -> usize {
preceding_by_pos(bytes, bytes.len(), line_term, count)
}
@@ -169,7 +171,7 @@ pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
/// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos
/// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`.
fn preceding_by_pos(
- bytes: &[u8],
+ bytes: &BStr,
mut pos: usize,
line_term: u8,
mut count: usize,
@@ -180,7 +182,7 @@ fn preceding_by_pos(
pos -= 1;
}
loop {
- match memrchr(line_term, &bytes[..pos]) {
+ match bytes[..pos].rfind_byte(line_term) {
None => {
return 0;
}
@@ -201,7 +203,10 @@ fn preceding_by_pos(
mod tests {
use std::ops::Range;
use std::str;
+
+ use bstr::B;
use grep_matcher::Match;
+
use super::*;
const SHERLOCK: &'static str = "\
@@ -220,7 +225,7 @@ and exhibited clearly, with a label attached.\
fn lines(text: &str) -> Vec<&str> {
let mut results = vec![];
let mut it = LineStep::new(b'\n', 0, text.len());
- while let Some(m) = it.next_match(text.as_bytes()) {
+ while let Some(m) = it.next_match(B(text)) {
results.push(&text[m]);
}
results
@@ -229,26 +234,26 @@ and exhibited clearly, with a label attached.\
fn line_ranges(text: &str) -> Vec<Range<usize>> {
let mut results = vec![];
let mut it = LineStep::new(b'\n', 0, text.len());
- while let Some(m) = it.next_match(text.as_bytes()) {
+ while let Some(m) = it.next_match(B(text)) {
results.push(m.start()..m.end());
}
results
}
fn prev(text: &str, pos: usize, count: usize) -> usize {
- preceding_by_pos(text.as_bytes(), pos, b'\n', count)
+ preceding_by_pos(B(text), pos, b'\n', count)
}
fn loc(text: &str, start: usize, end: usize) -> Match {
- locate(text.as_bytes(), b'\n', Match::new(start, end))
+ locate(B(text), b'\n', Match::new(start, end))
}
#[test]
fn line_count() {
- assert_eq!(0, count(b"", b'\n'));
- assert_eq!(1, count(b"\n", b'\n'));
- assert_eq!(2, count(b"\n\n", b'\n'));
- assert_eq!(2, count(b"a\nb\nc", b'\n'));
+ assert_eq!(0, count(B(""), b'\n'));
+ assert_eq!(1, count(B("\n"), b'\n'));
+ assert_eq!(2, count(B("\n\n"), b'\n'));
+ assert_eq!(2, count(B("a\nb\nc"), b'\n'));
}
#[test]
@@ -331,7 +336,7 @@ and exhibited clearly, with a label attached.\
#[test]
fn preceding_lines_doc() {
// These are the examples mentions in the documentation of `preceding`.
- let bytes = b"abc\nxyz\n";
+ let bytes = B("abc\nxyz\n");
assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));
diff --git a/grep-searcher/src/searcher/core.rs b/grep-searcher/src/searcher/core.rs
index 21dbae37..77f8369b 100644
--- a/grep-searcher/src/searcher/core.rs
+++ b/grep-searcher/src/searcher/core.rs
@@ -1,6 +1,6 @@
use std::cmp;
-use memchr::memchr;
+use bstr::BStr;
use grep_matcher::{LineMatchKind, Matcher};
use lines::{self, LineStep};
@@ -84,7 +84,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn matched(
&mut self,
- buf: &[u8],
+ buf: &BStr,
range: &Range,
) -> Result<bool, S::Error> {
self.sink_matched(buf, range)
@@ -107,7 +107,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
})
}
- pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+ pub fn match_by_line(&mut self, buf: &BStr) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() {
self.match_by_line_fast(buf)
} else {
@@ -115,7 +115,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
}
- pub fn roll(&mut self, buf: &[u8]) -> usize {
+ pub fn roll(&mut self, buf: &BStr) -> usize {
let consumed =
if self.config.max_context() == 0 {
buf.len()
@@ -141,7 +141,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
consumed
}
- pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool {
+ pub fn detect_binary(&mut self, buf: &BStr, range: &Range) -> bool {
if self.binary_byte_offset.is_some() {
return true;
}
@@ -149,7 +149,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
BinaryDetection::Quit(b) => b,
_ => return false,
};
- if let Some(i) = memchr(binary_byte, &buf[*range]) {
+ if let Some(i) = buf[*range].find_byte(binary_byte) {
self.binary_byte_offset = Some(range.start() + i);
true
} else {
@@ -159,7 +159,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn before_context_by_line(
&mut self,
- buf: &[u8],
+ buf: &BStr,
upto: usize,
) -> Result<bool, S::Error> {
if self.config.before_context == 0 {
@@ -194,7 +194,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn after_context_by_line(
&mut self,
- buf: &[u8],
+ buf: &BStr,
upto: usize,
) -> Result<bool, S::Error> {
if self.after_context_left == 0 {
@@ -219,7 +219,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn other_context_by_line(
&mut self,
- buf: &[u8],
+ buf: &BStr,
upto: usize,
) -> Result<bool, S::Error> {
let range = Range::new(self.last_line_visited, upto);
@@ -236,7 +236,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true)
}
- fn match_by_line_slow(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+ fn match_by_line_slow(&mut self, buf: &BStr) -> Result<bool, S::Error> {
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
let range = Range::new(self.pos(), buf.len());
@@ -255,7 +255,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&buf[line],
self.config.line_term,
);
- match self.matcher.shortest_match(slice) {
+ match self.matcher.shortest_match(slice.as_bytes()) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(result) => result.is_some(),
}
@@ -281,7 +281,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true)
}
- fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+ fn match_by_line_fast(&mut self, buf: &BStr) -> Result<bool, S::Error> {
debug_assert!(!self.config.passthru);
while !buf[self.pos()..].is_empty() {
@@ -316,7 +316,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
#[inline(always)]
fn match_by_line_fast_invert(
&mut self,
- buf: &[u8],
+ buf: &BStr,
) -> Result<bool, S::Error> {
assert!(self.config.invert_match);
@@ -357,14 +357,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
#[inline(always)]
fn find_by_line_fast(
&self,
- buf: &[u8],
+ buf: &BStr,
) -> Result<Option<Range>, S::Error> {
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
debug_assert!(self.is_line_by_line_fast());
let mut pos = self.pos();
while !buf[pos..].is_empty() {
- match self.matcher.find_candidate_line(&buf[pos..]) {
+ match self.matcher.find_candidate_line(buf[pos..].as_bytes()) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(None) => return Ok(None),
Ok(Some(LineMatchKind::Confirmed(i))) => {
@@ -396,7 +396,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&buf[line],