summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAndre Bogus <bogusandre@gmail.com>2016-09-23 04:59:25 +0200
committerAndrew Gallant <jamslam@gmail.com>2016-11-05 22:29:26 -0400
commit02de97b8ce2762a7530cc18bba737f6ccea022a2 (patch)
tree06d934ad7866e02b85ac151b25597fdb772cf949 /src
parent32db773d5148f2fedc57ab2b2b98d410f91a6f25 (diff)
Use the bytecount crate for fast line counting.
Fixes #128
Diffstat (limited to 'src')
-rw-r--r--src/search_stream.rs84
1 files changed, 3 insertions, 81 deletions
diff --git a/src/search_stream.rs b/src/search_stream.rs
index 8f458ca5..cbd7a63e 100644
--- a/src/search_stream.rs
+++ b/src/search_stream.rs
@@ -4,6 +4,8 @@ printing matches. In particular, it searches the file in a streaming fashion
using `read` calls and a (roughly) fixed size buffer.
*/
+extern crate bytecount;
+
use std::cmp;
use std::error::Error as StdError;
use std::fmt;
@@ -583,88 +585,8 @@ pub fn is_binary(buf: &[u8]) -> bool {
/// Count the number of lines in the given buffer.
#[inline(never)]
-
-#[inline(never)]
pub fn count_lines(buf: &[u8], eol: u8) -> u64 {
- // This was adapted from code in the memchr crate. The specific benefit
- // here is that we can avoid a branch in the inner loop because all we're
- // doing is counting.
-
- // The technique to count EOL bytes was adapted from:
- // http://bits.stephan-brumme.com/null.html
- const LO_U64: u64 = 0x0101010101010101;
- const HI_U64: u64 = 0x8080808080808080;
-
- // use truncation
- const LO_USIZE: usize = LO_U64 as usize;
- const HI_USIZE: usize = HI_U64 as usize;
-
- #[cfg(target_pointer_width = "32")]
- const USIZE_BYTES: usize = 4;
- #[cfg(target_pointer_width = "64")]
- const USIZE_BYTES: usize = 8;
-
- fn count_eol(eol: usize) -> u64 {
- // Ideally, this would compile down to a POPCNT instruction, but
- // it looks like you need to set RUSTFLAGS="-C target-cpu=native"
- // (or target-feature=+popcnt) to get that to work. Bummer.
- (eol.wrapping_sub(LO_USIZE) & !eol & HI_USIZE).count_ones() as u64
- }
-
- #[cfg(target_pointer_width = "32")]
- fn repeat_byte(b: u8) -> usize {
- let mut rep = (b as usize) << 8 | b as usize;
- rep = rep << 16 | rep;
- rep
- }
-
- #[cfg(target_pointer_width = "64")]
- fn repeat_byte(b: u8) -> usize {
- let mut rep = (b as usize) << 8 | b as usize;
- rep = rep << 16 | rep;
- rep = rep << 32 | rep;
- rep
- }
-
- fn count_lines_slow(mut buf: &[u8], eol: u8) -> u64 {
- let mut count = 0;
- while let Some(pos) = memchr(eol, buf) {
- count += 1;
- buf = &buf[pos + 1..];
- }
- count
- }
-
- let len = buf.len();
- let ptr = buf.as_ptr();
- let mut count = 0;
-
- // Search up to an aligned boundary...
- let align = (ptr as usize) & (USIZE_BYTES - 1);
- let mut i = 0;
- if align > 0 {
- i = cmp::min(USIZE_BYTES - align, len);
- count += count_lines_slow(&buf[..i], eol);
- }
-
- // ... and search the rest.
- let repeated_eol = repeat_byte(eol);
-
- if len >= 2 * USIZE_BYTES {
- while i <= len - (2 * USIZE_BYTES) {
- unsafe {
- let u = *(ptr.offset(i as isize) as *const usize);
- let v = *(ptr.offset((i + USIZE_BYTES) as isize)
- as *const usize);
-
- count += count_eol(u ^ repeated_eol);
- count += count_eol(v ^ repeated_eol);
- }
- i += USIZE_BYTES * 2;
- }
- }
- count += count_lines_slow(&buf[i..], eol);
- count
+ bytecount::count(buf, eol) as u64
}
/// Replaces a with b in buf.