summaryrefslogtreecommitdiffstats
path: root/grep-searcher/src/line_buffer.rs
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2019-01-20 12:32:09 -0500
committerAndrew Gallant <jamslam@gmail.com>2019-01-20 12:32:09 -0500
commit4b88e08f418d4c24223879a727efa5ad0c3c9058 (patch)
tree213f92d921b28d3d1841742085810005b9cf7bfd /grep-searcher/src/line_buffer.rs
parent7cbc535d70a53c81dfa3e58552c01f21c2e38d28 (diff)
search: migrate to bstrag/bstr-migration
This is an initial attempt at migrating grep-searcher to use the new bstr crate (not yet published). This is mostly an improvement, although a significant problem is that the grep-matcher crate controls the `Index` impls for the `Match` type, which we use quite heavily. Thus, in order to impl `Index` for `BStr`, we need add bstr as a public dependency to grep-matcher. This is really bad news because grep-matcher is supposed to be a light-weight core crate that defines a matcher interface, which is itself intended to be a public dependency. Thus, a semver bump on bstr will have very undesirable ripple effects thoughout ripgrep's library crates. This would be something we could stomach if bstr was solid at 1.0 and committed to avoiding breaking changes. But it's not there yet.
Diffstat (limited to 'grep-searcher/src/line_buffer.rs')
-rw-r--r--grep-searcher/src/line_buffer.rs121
1 files changed, 43 insertions, 78 deletions
diff --git a/grep-searcher/src/line_buffer.rs b/grep-searcher/src/line_buffer.rs
index 0f5a2a7a..5a969743 100644
--- a/grep-searcher/src/line_buffer.rs
+++ b/grep-searcher/src/line_buffer.rs
@@ -1,8 +1,7 @@
use std::cmp;
use std::io;
-use std::ptr;
-use memchr::{memchr, memrchr};
+use bstr::{BStr, BString};
/// The default buffer capacity that we use for the line buffer.
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
@@ -123,7 +122,7 @@ impl LineBufferBuilder {
pub fn build(&self) -> LineBuffer {
LineBuffer {
config: self.config,
- buf: vec![0; self.config.capacity],
+ buf: BString::from(vec![0; self.config.capacity]),
pos: 0,
last_lineterm: 0,
end: 0,
@@ -254,7 +253,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
}
/// Return the contents of this buffer.
- pub fn buffer(&self) -> &[u8] {
+ pub fn buffer(&self) -> &BStr {
self.line_buffer.buffer()
}
@@ -284,7 +283,7 @@ pub struct LineBuffer {
/// The configuration of this buffer.
config: Config,
/// The primary buffer with which to hold data.
- buf: Vec<u8>,
+ buf: BString,
/// The current position of this buffer. This is always a valid sliceable
/// index into `buf`, and its maximum value is the length of `buf`.
pos: usize,
@@ -339,13 +338,13 @@ impl LineBuffer {
}
/// Return the contents of this buffer.
- fn buffer(&self) -> &[u8] {
+ fn buffer(&self) -> &BStr {
&self.buf[self.pos..self.last_lineterm]
}
/// Return the contents of the free space beyond the end of the buffer as
/// a mutable slice.
- fn free_buffer(&mut self) -> &mut [u8] {
+ fn free_buffer(&mut self) -> &mut BStr {
&mut self.buf[self.end..]
}
@@ -396,7 +395,7 @@ impl LineBuffer {
assert_eq!(self.pos, 0);
loop {
self.ensure_capacity()?;
- let readlen = rdr.read(self.free_buffer())?;
+ let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
if readlen == 0 {
// We're only done reading for good once the caller has
// consumed everything.
@@ -416,7 +415,7 @@ impl LineBuffer {
match self.config.binary {
BinaryDetection::None => {} // nothing to do
BinaryDetection::Quit(byte) => {
- if let Some(i) = memchr(byte, newbytes) {
+ if let Some(i) = newbytes.find_byte(byte) {
self.end = oldend + i;
self.last_lineterm = self.end;
self.binary_byte_offset =
@@ -444,7 +443,7 @@ impl LineBuffer {
}
// Update our `last_lineterm` positions if we read one.
- if let Some(i) = memrchr(self.config.lineterm, newbytes) {
+ if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
self.last_lineterm = oldend + i + 1;
return Ok(true);
}
@@ -467,40 +466,8 @@ impl LineBuffer {
return;
}
- assert!(self.pos < self.end && self.end <= self.buf.len());
let roll_len = self.end - self.pos;
- unsafe {
- // SAFETY: A buffer contains Copy data, so there's no problem
- // moving it around. Safety also depends on our indices being
- // in bounds, which they should always be, and we enforce with
- // an assert above.
- //
- // It seems like it should be possible to do this in safe code that
- // results in the same codegen. I tried the obvious:
- //
- // for (src, dst) in (self.pos..self.end).zip(0..) {
- // self.buf[dst] = self.buf[src];
- // }
- //
- // But the above does not work, and in fact compiles down to a slow
- // byte-by-byte loop. I tried a few other minor variations, but
- // alas, better minds might prevail.
- //
- // Overall, this doesn't save us *too* much. It mostly matters when
- // the number of bytes we're copying is large, which can happen
- // if the searcher is asked to produce a lot of context. We could
- // decide this isn't worth it, but it does make an appreciable
- // impact at or around the context=30 range on my machine.
- //
- // We could also use a temporary buffer that compiles down to two
- // memcpys and is faster than the byte-at-a-time loop, but it
- // complicates our options for limiting memory allocation a bit.
- ptr::copy(
- self.buf[self.pos..].as_ptr(),
- self.buf.as_mut_ptr(),
- roll_len,
- );
- }
+ self.buf.copy_within(self.pos.., 0);
self.pos = 0;
self.last_lineterm = roll_len;
self.end = roll_len;
@@ -536,14 +503,15 @@ impl LineBuffer {
}
}
-/// Replaces `src` with `replacement` in bytes.
-fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
+/// Replaces `src` with `replacement` in bytes, and return the offset of the
+/// first replacement, if one exists.
+fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
if src == replacement {
return None;
}
let mut first_pos = None;
let mut pos = 0;
- while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) {
+ while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
if first_pos.is_none() {
first_pos = Some(i);
}
@@ -560,6 +528,7 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
#[cfg(test)]
mod tests {
use std::str;
+ use bstr::BString;
use super::*;
const SHERLOCK: &'static str = "\
@@ -575,18 +544,14 @@ and exhibited clearly, with a label attached.\
slice.to_string()
}
- fn btos(slice: &[u8]) -> &str {
- str::from_utf8(slice).unwrap()
- }
-
fn replace_str(
slice: &str,
src: u8,
replacement: u8,
) -> (String, Option<usize>) {
- let mut dst = slice.to_string().into_bytes();
+ let mut dst = BString::from(slice);
let result = replace_bytes(&mut dst, src, replacement);
- (String::from_utf8(dst).unwrap(), result)
+ (dst.into_string().unwrap(), result)
}
#[test]
@@ -607,7 +572,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\n");
assert_eq!(rdr.absolute_byte_offset(), 0);
rdr.consume(5);
assert_eq!(rdr.absolute_byte_offset(), 5);
@@ -615,7 +580,7 @@ and exhibited clearly, with a label attached.\
assert_eq!(rdr.absolute_byte_offset(), 11);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "maggie");
+ assert_eq!(rdr.buffer(), "maggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -630,7 +595,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -645,7 +610,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\n");
+ assert_eq!(rdr.buffer(), "\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -660,7 +625,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\n\n");
+ assert_eq!(rdr.buffer(), "\n\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -698,12 +663,12 @@ and exhibited clearly, with a label attached.\
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
- let mut got = vec![];
+ let mut got = BString::new();
while rdr.fill().unwrap() {
- got.extend(rdr.buffer());
+ got.push(rdr.buffer());
rdr.consume_all();
}
- assert_eq!(bytes, btos(&got));
+ assert_eq!(bytes, got);
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
assert_eq!(rdr.binary_byte_offset(), None);
}
@@ -718,11 +683,11 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\n");
+ assert_eq!(rdr.buffer(), "homer\n");
rdr.consume_all();
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "lisa\n");
+ assert_eq!(rdr.buffer(), "lisa\n");
rdr.consume_all();
// This returns an error because while we have just enough room to
@@ -732,11 +697,11 @@ and exhibited clearly, with a label attached.\
assert!(rdr.fill().is_err());
// We can mush on though!
- assert_eq!(btos(rdr.buffer()), "m");
+ assert_eq!(rdr.buffer(), "m");
rdr.consume_all();
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "aggie");
+ assert_eq!(rdr.buffer(), "aggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -752,16 +717,16 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\n");
+ assert_eq!(rdr.buffer(), "homer\n");
rdr.consume_all();
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "lisa\n");
+ assert_eq!(rdr.buffer(), "lisa\n");
rdr.consume_all();
// We have just enough space.
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "maggie");
+ assert_eq!(rdr.buffer(), "maggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -777,7 +742,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().is_err());
- assert_eq!(btos(rdr.buffer()), "");
+ assert_eq!(rdr.buffer(), "");
}
#[test]
@@ -789,7 +754,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nli\x00sa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -808,7 +773,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nli");
+ assert_eq!(rdr.buffer(), "homer\nli");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -825,7 +790,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(!rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "");
+ assert_eq!(rdr.buffer(), "");
assert_eq!(rdr.absolute_byte_offset(), 0);
assert_eq!(rdr.binary_byte_offset(), Some(0));
}
@@ -841,7 +806,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -860,7 +825,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -878,7 +843,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\
+ assert_eq!(rdr.buffer(), "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, s\
");
@@ -901,7 +866,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "homer\nli\nsa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -920,7 +885,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n");
+ assert_eq!(rdr.buffer(), "\nhomer\nlisa\nmaggie\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -939,7 +904,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());
@@ -958,7 +923,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap());
- assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
+ assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
rdr.consume_all();
assert!(!rdr.fill().unwrap());