summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock11
-rw-r--r--grep-matcher/Cargo.toml7
-rw-r--r--grep-matcher/src/interpolate.rs4
-rw-r--r--grep-matcher/src/lib.rs20
-rw-r--r--grep-searcher/Cargo.toml7
-rw-r--r--grep-searcher/src/lib.rs2
-rw-r--r--grep-searcher/src/line_buffer.rs121
-rw-r--r--grep-searcher/src/lines.rs61
-rw-r--r--grep-searcher/src/searcher/core.rs50
-rw-r--r--grep-searcher/src/searcher/glue.rs13
-rw-r--r--grep-searcher/src/searcher/mod.rs25
-rw-r--r--grep-searcher/src/testutil.rs6
12 files changed, 169 insertions, 158 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 8f8965d3..20447b8b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -35,6 +35,13 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
+name = "bstr"
+version = "0.0.1"
+dependencies = [
+ "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
name = "bytecount"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -180,7 +187,7 @@ dependencies = [
name = "grep-matcher"
version = "0.1.1"
dependencies = [
- "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bstr 0.0.1",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -222,13 +229,13 @@ dependencies = [
name = "grep-searcher"
version = "0.1.1"
dependencies = [
+ "bstr 0.0.1",
"bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-matcher 0.1.1",
"grep-regex 0.1.1",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
diff --git a/grep-matcher/Cargo.toml b/grep-matcher/Cargo.toml
index 39391291..d65ee505 100644
--- a/grep-matcher/Cargo.toml
+++ b/grep-matcher/Cargo.toml
@@ -13,8 +13,11 @@ keywords = ["regex", "pattern", "trait"]
license = "Unlicense/MIT"
autotests = false
-[dependencies]
-memchr = "2.1"
+[dependencies.bstr]
+version = "*"
+path = "/home/andrew/rust/bstr"
+default-features = false
+features = ["std"]
[dev-dependencies]
regex = "1.1"
diff --git a/grep-matcher/src/interpolate.rs b/grep-matcher/src/interpolate.rs
index 168dd343..126ce521 100644
--- a/grep-matcher/src/interpolate.rs
+++ b/grep-matcher/src/interpolate.rs
@@ -1,6 +1,6 @@
use std::str;
-use memchr::memchr;
+use bstr::B;
/// Interpolate capture references in `replacement` and write the interpolation
/// result to `dst`. References in `replacement` take the form of $N or $name,
@@ -22,7 +22,7 @@ pub fn interpolate<A, N>(
N: FnMut(&str) -> Option<usize>
{
while !replacement.is_empty() {
- match memchr(b'$', replacement) {
+ match B(replacement).find_byte(b'$') {
None => break,
Some(i) => {
dst.extend(&replacement[..i]);
diff --git a/grep-matcher/src/lib.rs b/grep-matcher/src/lib.rs
index 9a067efa..ba59b923 100644
--- a/grep-matcher/src/lib.rs
+++ b/grep-matcher/src/lib.rs
@@ -38,13 +38,15 @@ implementations.
#![deny(missing_docs)]
-extern crate memchr;
+extern crate bstr;
use std::fmt;
use std::io;
use std::ops;
use std::u64;
+use bstr::BStr;
+
use interpolate::interpolate;
mod interpolate;
@@ -180,6 +182,22 @@ impl ops::IndexMut<Match> for [u8] {
}
}
+impl ops::Index<Match> for BStr {
+ type Output = BStr;
+
+ #[inline]
+ fn index(&self, index: Match) -> &BStr {
+ &self[index.start..index.end]
+ }
+}
+
+impl ops::IndexMut<Match> for BStr {
+ #[inline]
+ fn index_mut(&mut self, index: Match) -> &mut BStr {
+ &mut self[index.start..index.end]
+ }
+}
+
impl ops::Index<Match> for str {
type Output = str;
diff --git a/grep-searcher/Cargo.toml b/grep-searcher/Cargo.toml
index 4cf5fde3..1e5c4243 100644
--- a/grep-searcher/Cargo.toml
+++ b/grep-searcher/Cargo.toml
@@ -18,9 +18,14 @@ encoding_rs = "0.8.14"
encoding_rs_io = "0.1.3"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
log = "0.4.5"
-memchr = "2.1"
memmap = "0.7"
+[dependencies.bstr]
+version = "*"
+path = "/home/andrew/rust/bstr"
+default-features = false
+features = ["std"]
+
[dev-dependencies]
grep-regex = { version = "0.1.1", path = "../grep-regex" }
regex = "1.1"
diff --git a/grep-searcher/src/lib.rs b/grep-searcher/src/lib.rs
index f3ec02f2..6a9f4ba7 100644
--- a/grep-searcher/src/lib.rs
+++ b/grep-searcher/src/lib.rs
@@ -99,13 +99,13 @@ searches stdin.
#![deny(missing_docs)]
+extern crate bstr;
extern crate bytecount;
extern crate encoding_rs;
extern crate encoding_rs_io;
extern crate grep_matcher;
#[macro_use]
extern crate log;
-extern crate memchr;
extern crate memmap;
#[cfg(test)]
extern crate regex;
diff --git a/grep-searcher/src/line_buffer.rs b/grep-searcher/src/line_buffer.rs
index 0f5a2a7a..5a969743 100644
--- a/grep-searcher/src/line_buffer.rs
+++ b/grep-searcher/src/line_buffer.rs
@@ -1,8 +1,7 @@
use std::cmp;
use std::io;
-use std::ptr;
-use memchr::{memchr, memrchr};
+use bstr::{BStr, BString};
/// The default buffer capacity that we use for the line buffer.
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
@@ -123,7 +122,7 @@ impl LineBufferBuilder {
pub fn build(&self) -> LineBuffer {
LineBuffer {
config: self.config,
- buf: vec![0; self.config.capacity],
+ buf: BString::from(vec![0; self.config.capacity]),
pos: 0,
last_lineterm: 0,
end: 0,
@@ -254,7 +253,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
}
/// Return the contents of this buffer.
- pub fn buffer(&self) -> &[u8] {
+ pub fn buffer(&self) -> &BStr {
self.line_buffer.buffer()
}
@@ -284,7 +283,7 @@ pub struct LineBuffer {
/// The configuration of this buffer.
config: Config,
/// The primary buffer with which to hold data.
- buf: Vec<u8>,
+ buf: BString,
/// The current position of this buffer. This is always a valid sliceable
/// index into `buf`, and its maximum value is the length of `buf`.
pos: usize,
@@ -339,13 +338,13 @@ impl LineBuffer {
}
/// Return the contents of this buffer.
- fn buffer(&self) -> &[u8] {
+ fn buffer(&self) -> &BStr {
&self.buf[self.pos..self.last_lineterm]
}
/// Return the contents of the free space beyond the end of the buffer as
/// a mutable slice.
- fn free_buffer(&mut self) -> &mut [u8] {
+ fn free_buffer(&mut self) -> &mut BStr {
&mut self.buf[self.end..]
}
@@ -396,7 +395,7 @@ impl LineBuffer {
assert_eq!(self.pos, 0);
loop {
self.ensure_capacity()?;
- let readlen = rdr.read(self.free_buffer())?;
+ let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
if readlen == 0 {
// We're only done reading for good once the caller has
// consumed everything.
@@ -416,7 +415,7 @@ impl LineBuffer {
match self.config.binary {
BinaryDetection::None => {} // nothing to do
BinaryDetection::Quit(byte) => {
- if let Some(i) = memchr(byte, newbytes) {
+ if let Some(i) = newbytes.find_byte(byte) {
self.end = oldend + i;
self.last_lineterm = self.end;
self.binary_byte_offset =
@@ -444,7 +443,7 @@ impl LineBuffer {
}
// Update our `last_lineterm` positions if we read one.
- if let Some(i) = memrchr(self.config.lineterm, newbytes) {
+ if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
self.last_lineterm = oldend + i + 1;
return Ok(true);
}
@@ -467,40 +466,8 @@ impl LineBuffer {
return;
}
- assert!(self.pos < self.end && self.end <= self.buf.len());
let roll_len = self.end - self.pos;
- unsafe {
- // SAFETY: A buffer contains Copy data, so there's no problem
- // moving it around. Safety also depends on our indices being
- // in bounds, which they should always be, and we enforce with
- // an assert above.
- //
- // It seems like it should be possible to do this in safe code that
- // results in the same codegen. I tried the obvious:
- //
- // for (src, dst) in (self.pos..self.end).zip(0..) {
- // self.buf[dst] = self.buf[src];
- // }
- //
- // But the above does not work, and in fact compiles down to a slow
- // byte-by-byte loop. I tried a few other minor variations, but
- // alas, better minds might prevail.
- //
- // Overall, this doesn't save us *too* much. It mostly matters when
- // the number of bytes we're copying is large, which can happen
- // if the searcher is asked to produce a lot of context. We could
- // decide this isn't worth it, but it does make an appreciable
- // impact at or around the context=30 range on my machine.
- //
- // We could also use a temporary buffer that compiles down to two
- // memcpys and is faster than the byte-at-a-time loop, but it
- // complicates our options for limiting memory allocation a bit.
- ptr::copy(
- self.buf[self.pos..].as_ptr(),
- self.buf.as_mut_ptr(),
- roll_len,
- );
- }
+ self.buf.copy_within(self.pos.., 0);
self.pos = 0;
self.last_lineterm = roll_len;
self.end = roll_len;
@@ -536,14 +503,15 @@ impl LineBuffer {
}
}
-/// Replaces `src` with `replacement` in bytes.
-fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
+/// Replaces `src` with `replacement` in bytes, and return the offset of the
+/// first replacement, if one exists.
+fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
if src == replacement {
return None;
}
let mut first_pos = None;
let mut pos = 0;
- while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) {
+ while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
if first_pos.is_none() {
first_pos = Some(i);
}
@@ -560,6 +528,7 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
#[cfg(test)]
mod tests {
use std::str;
+ use bstr::BString;
use super::*;
const SHERLOCK: &'static str = "\
@@ -575,18 +544,14 @@ and exhibited clearly, with a label attached.\
slice.to_string()
}
- fn btos(slice: &[u8]) -> &str {
- str::from_utf8(slice).unwrap()
- }
-
fn replace_str(
slice: &str,
src: u8,
replacement: u8,
) -> (String, Option<usize>) {
- let mut dst = slice.to_string().into_bytes();
+ let mut dst = BString::from(slice);
let result = replace_bytes(&mut dst, src, replacement);
- (String::from_utf8(dst).unwrap(), result)
+ (dst.into_string().unwrap(), result)
}
#[test]
@@ -607,7 +572,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\n");
assert_eq!(rdr.absolute_byte_offset(), 0);
rdr.consume(5);
assert_eq!(rdr.absolute_byte_offset(), 5);
@@ -615,7 +580,7 @@ and exhibited clearly, with a label attached.\
assert_eq!(rdr.absolute_byte_offset(), 11);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "maggie");
+ assert_eq!(rdr.buffer(), "maggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -630,7 +595,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -645,7 +610,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\n");
+ assert_eq!(rdr.buffer(), "\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -660,7 +625,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\n\n");
+ assert_eq!(rdr.buffer(), "\n\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -698,12 +663,12 @@ and exhibited clearly, with a label attached.\
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
- let mut got = vec![];
+ let mut got = BString::new();
while rdr.fill().unwrap() {
- got.extend(rdr.buffer());
+ got.push(rdr.buffer());
rdr.consume_all();
}
- assert_eq!(bytes, btos(&got));
+ assert_eq!(bytes, got);
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
assert_eq!(rdr.binary_byte_offset(), None);
}
@@ -718,11 +683,11 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\n");
+ assert_eq!(rdr.buffer(), "homer\n");
rdr.consume_all();
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "lisa\n");
+ assert_eq!(rdr.buffer(), "lisa\n");
rdr.consume_all();
// This returns an error because while we have just enough room to
@@ -732,11 +697,11 @@ and exhibited clearly, with a label attached.\
assert!(rdr.fill().is_err());
// We can mush on though!
- assert_eq!(btos(rdr.buffer()), "m");
+ assert_eq!(rdr.buffer(), "m");
rdr.consume_all();
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "aggie");
+ assert_eq!(rdr.buffer(), "aggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -752,16 +717,16 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\n");
+ assert_eq!(rdr.buffer(), "homer\n");
rdr.consume_all();
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "lisa\n");
+ assert_eq!(rdr.buffer(), "lisa\n");
rdr.consume_all();
// We have just enough space.
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "maggie");
+ assert_eq!(rdr.buffer(), "maggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -777,7 +742,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().is_err());
- assert_eq!(btos(rdr.buffer()), "");
+ assert_eq!(rdr.buffer(), "");
}
#[test]
@@ -789,7 +754,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nli\x00sa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -808,7 +773,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nli");
+ assert_eq!(rdr.buffer(), "homer\nli");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -825,7 +790,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(!rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "");
+ assert_eq!(rdr.buffer(), "");
assert_eq!(rdr.absolute_byte_offset(), 0);
assert_eq!(rdr.binary_byte_offset(), Some(0));
}
@@ -841,7 +806,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -860,7 +825,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -878,7 +843,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\
+ assert_eq!(rdr.buffer(), "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, s\
");
@@ -901,7 +866,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nli\nsa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -920,7 +885,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "\nhomer\nlisa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -939,7 +904,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -958,7 +923,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
diff --git a/grep-searcher/src/lines.rs b/grep-searcher/src/lines.rs
index ed225a42..aac7a343 100644
--- a/grep-searcher/src/lines.rs
+++ b/grep-searcher/src/lines.rs
@@ -2,8 +2,8 @@
A collection of routines for performing operations on lines.
*/
+use bstr::{B, BStr};
use bytecount;
-use memchr::{memchr, memrchr};
use grep_matcher::{LineTerminator, Match};
/// An iterator over lines in a particular slice of bytes.
@@ -14,7 +14,7 @@ use grep_matcher::{LineTerminator, Match};
/// `'b` refers to the lifetime of the underlying bytes.
#[derive(Debug)]
pub struct LineIter<'b> {
- bytes: &'b [u8],
+ bytes: &'b BStr,
stepper: LineStep,
}
@@ -23,7 +23,7 @@ impl<'b> LineIter<'b> {
/// are terminated by `line_term`.
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
LineIter {
- bytes: bytes,
+ bytes: B(bytes),
stepper: LineStep::new(line_term, 0, bytes.len()),
}
}
@@ -33,7 +33,7 @@ impl<'b> Iterator for LineIter<'b> {
type Item = &'b [u8];
fn next(&mut self) -> Option<&'b [u8]> {
- self.stepper.next_match(self.bytes).map(|m| &self.bytes[m])
+ self.stepper.next_match(self.bytes).map(|m| self.bytes[m].as_bytes())
}
}
@@ -73,19 +73,19 @@ impl LineStep {
/// The range returned includes the line terminator. Ranges are always
/// non-empty.
pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
- self.next_impl(bytes)
+ self.next_impl(B(bytes))
}
/// Like next, but returns a `Match` instead of a tuple.
#[inline(always)]
- pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
+ pub(crate) fn next_match(&mut self, bytes: &BStr) -> Option<Match> {
self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
}
#[inline(always)]
- fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
+ fn next_impl(&mut self, mut bytes: &BStr) -> Option<(usize, usize)> {
bytes = &bytes[..self.end];
- match memchr(self.line_term, &bytes[self.pos..]) {
+ match bytes[self.pos..].find_byte(self.line_term) {
None => {
if self.pos < bytes.len() {
let m = (self.pos, bytes.len());
@@ -109,15 +109,15 @@ impl LineStep {
}
/// Count the number of occurrences of `line_term` in `bytes`.
-pub fn count(bytes: &[u8], line_term: u8) -> u64 {
- bytecount::count(bytes, line_term) as u64
+pub fn count(bytes: &BStr, line_term: u8) -> u64 {
+ bytecount::count(bytes.as_bytes(), line_term) as u64
}
/// Given a line that possibly ends with a terminator, return that line without
/// the terminator.
#[inline(always)]
-pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
- let line_term = line_term.as_bytes();
+pub fn without_terminator(bytes: &BStr, line_term: LineTerminator) -> &BStr {
+ let line_term = BStr::new(line_term.as_bytes());
let start = bytes.len().saturating_sub(line_term.len());
if bytes.get(start..) == Some(line_term) {
return &bytes[..bytes.len() - line_term.len()];
@@ -131,18 +131,20 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
/// Line terminators are considered part of the line they terminate.
#[inline(always)]
pub fn locate(
- bytes: &[u8],
+ bytes: &BStr,
line_term: u8,
range: Match,
) -> Match {
- let line_start = memrchr(line_term, &bytes[0..range.start()])
+ let line_start = bytes[..range.start()]
+ .rfind_byte(line_term)
.map_or(0, |i| i + 1);
let line_end =
if range.end() > line_start && bytes[range.end() - 1] == line_term {
range.end()
} else {
- memchr(line_term, &bytes[range.end()..])
- .map_or(bytes.len(), |i| range.end() + i + 1)
+ bytes[range.end()..]
+ .find_byte(line_term)
+ .map_or(bytes.len(), |i| range.end() + i + 1)
};
Match::new(line_start, line_end)
}
@@ -155,7 +157,7 @@ pub fn locate(
///
/// If `bytes` ends with a line terminator, then the terminator itself is
/// considered part of the last line.
-pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
+pub fn preceding(bytes: &BStr, line_term: u8, count: usize) -> usize {
preceding_by_pos(bytes, bytes.len(), line_term, count)
}
@@ -169,7 +171,7 @@ pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
/// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos
/// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`.
fn preceding_by_pos(
- bytes: &[u8],
+ bytes: &BStr,
mut pos: usize,
line_term: u8,
mut count: usize,
@@ -180,7 +182,7 @@ fn preceding_by_pos(
pos -= 1;
}
loop {
- match memrchr(line_term, &bytes[..pos]) {
+ match bytes[..pos].rfind_byte(line_term) {
None => {
return 0;
}
@@ -201,7 +203,10 @@ fn preceding_by_pos(
mod tests {
use std::ops::Range;
use std::str;
+
+ use bstr::B;
use grep_matcher::Match;
+
use super::*;
const SHERLOCK: &'static str = "\
@@ -220,7 +225,7 @@ and exhibited clearly, with a label attached.\
fn lines(text: &str) -> Vec<&str> {
let mut results = vec![];
let mut it = LineStep::new(b'\n', 0, text.len());
- while let Some(m) = it.next_match(text.as_bytes()) {
+ while let Some(m) = it.next_match(B(text)) {
results.push(&text[m]);
}
results
@@ -229,26 +234,26 @@ and exhibited clearly, with a label attached.\
fn line_ranges(text: &str) -> Vec<Range<usize>> {
let mut results = vec![];
let mut it = LineStep::new(b'\n', 0, text.len());
- while let Some(m) = it.next_match(text.as_bytes()) {
+ while let Some(m) = it.next_match(B(text)) {
results.push(m.start()..m.end());
}
results
}
fn prev(text: &str, pos: usize, count: usize) -> usize {
- preceding_by_pos(text.as_bytes(), pos, b'\n', count)
+ preceding_by_pos(B(text), pos, b'\n', count)
}
fn loc(text: &str, start: usize, end: usize) -> Match {
- locate(text.as_bytes(), b'\n', Match::new(start, end))
+ locate(B(text), b'\n', Match::new(start, end))
}
#[test]
fn line_count() {
- assert_eq!(0, count(b"", b'\n'));
- assert_eq!(1, count(b"\n", b'\n'));
- assert_eq!(2, count(b"\n\n", b'\n'));
- assert_eq!(2, count(b"a\nb\nc", b'\n'));
+ assert_eq!(0, count(B(""), b'\n'));
+ assert_eq!(1, count(B("\n"), b'\n'));
+ assert_eq!(2, count(B("\n\n"), b'\n'));
+ assert_eq!(2, count(B("a\nb\nc"), b'\n'));
}
#[test]
@@ -331,7 +336,7 @@ and exhibited clearly, with a label attached.\
#[test]
fn preceding_lines_doc() {
// These are the examples mentions in the documentation of `preceding`.
- let bytes = b"abc\nxyz\n";
+ let bytes = B("abc\nxyz\n");
assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));
diff --git a/grep-searcher/src/searcher/core.rs b/grep-searcher/src/searcher/core.rs
index 21dbae37..77f8369b 100644
--- a/grep-searcher/src/searcher/core.rs
+++ b/grep-searcher/src/searcher/core.rs
@@ -1,6 +1,6 @@
use std::cmp;
-use memchr::memchr;
+use bstr::BStr;
use grep_matcher::{LineMatchKind, Matcher};
use lines::{self, LineStep};
@@ -84,7 +84,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn matched(
&mut self,
- buf: &[u8],
+ buf: &BStr,
range: &Range,
) -> Result<bool, S::Error> {
self.sink_matched(buf, range)
@@ -107,7 +107,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
})
}
- pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+ pub fn match_by_line(&mut self, buf: &BStr) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() {
self.match_by_line_fast(buf)
} else {
@@ -115,7 +115,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
}
- pub fn roll(&mut self, buf: &[u8]) -> usize {
+ pub fn roll(&mut self, buf: &BStr) -> usize {
let consumed =
if self.config.max_context() == 0 {
buf.len()
@@ -141,7 +141,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
consumed
}
- pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool {
+ pub fn detect_binary(&mut self, buf: &BStr, range: &Range) -> bool {
if self.binary_byte_offset.is_some() {
return true;
}
@@ -149,7 +149,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
BinaryDetection::Quit(b) => b,
_ => return false,
};
- if let Some(i) = memchr(binary_byte, &buf[*range]) {
+ if let Some(i) = buf[*range].find_byte(binary_byte) {
self.binary_byte_offset = Some(range.start() + i);
true
} else {
@@ -159,7 +159,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn before_context_by_line(
&mut self,
- buf: &[u8],
+ buf: &BStr,
upto: usize,
) -> Result<bool, S::Error> {
if self.config.before_context == 0 {
@@ -194,7 +194,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn after_context_by_line(
&mut self,
- buf: &[u8],
+ buf: &BStr,
upto: usize,
) -> Result<bool, S::Error> {
if self.after_context_left == 0 {
@@ -219,7 +219,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn other_context_by_line(
&mut self,
- buf: &[u8],
+ buf: &BStr,
upto: usize,
) -> Result<bool, S::Error> {
let range = Range::new(self.last_line_visited, upto);
@@ -236,7 +236,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true)
}
- fn match_by_line_slow(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+ fn match_by_line_slow(&mut self, buf: &BStr) -> Result<bool, S::Error> {
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
let range = Range::new(self.pos(), buf.len());
@@ -255,7 +255,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&buf[line],
self.config.line_term,
);
- match self.matcher.shortest_match(slice) {
+ match self.matcher.shortest_match(slice.as_bytes()) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(result) => result.is_some(),
}
@@ -281,7 +281,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true)
}
- fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+ fn match_by_line_fast(&mut self, buf: &BStr) -> Result<bool, S::Error> {
debug_assert!(!self.config.passthru);
while !buf[self.pos()..].is_empty() {
@@ -316,7 +316,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
#[inline(always)]
fn match_by_line_fast_invert(
&mut self,
- buf: &[u8],
+ buf: &BStr,
) -> Result<bool, S::Error> {
assert!(self.config.invert_match);
@@ -357,14 +357,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
#[inline(always)]
fn find_by_line_fast(
&self,
- buf: &[u8],
+ buf: &BStr,
) -> Result<Option<Range>, S::Error> {
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
debug_assert!(self.is_line_by_line_fast());
let mut pos = self.pos();
while !buf[pos..].is_empty() {
- match self.matcher.find_candidate_line(&buf[pos..]) {
+ match self.matcher.find_candidate_line(buf[pos..].as_bytes()) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(None) => return Ok(None),
Ok(Some(LineMatchKind::Confirmed(i))) => {
@@ -396,7 +396,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&buf[line],
self.config.line_term,
);
- match self.matcher.is_match(slice) {
+ match self.matcher.is_match(slice.as_bytes()) {
Err(err) => return Err(S::Error::error_message(err)),
Ok(true) => return Ok(Some(line)),
Ok(false) => {
@@ -413,7 +413,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
#[inline(always)]
fn sink_matched(
&mut self,
- buf: &[u8],
+ buf: &BStr,
range: &Range,
) -> Result<bool, S::Error> {
if self.binary && self.detect_binary(buf, range) {
@@ -438,7 +438,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&self.searcher,
&SinkMatch {
line_term: self.config.line_term,
- bytes: linebuf,
+ bytes: linebuf.as_bytes(),
absolute_byte_offset: offset,
line_number: self.line_number,
},
@@ -454,7 +454,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
fn sink_before_context(
&mut self,
- buf: &[u8],
+ buf: &BStr,
range: &Range,
) -> Result<bool, S::Error> {
if self.binary && self.detect_binary(buf, range) {
@@ -466,7 +466,7 @@ impl<'s, M: Matche