diff options
author | Andrew Gallant <jamslam@gmail.com> | 2016-09-10 01:35:44 -0400 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2016-09-10 01:35:44 -0400 |
commit | 19615245cd218c309e03c5a072ddfe0929c28a7f (patch) | |
tree | 7b3eda1cf55860b8a9eac5ee7f42a5d9f88adb66 | |
parent | 98a48b44bc4158a1e861cffce33609fa99912566 (diff) |
Make line counting much faster.
-rw-r--r-- | src/search_stream.rs | 85 |
1 files changed, 80 insertions, 5 deletions
diff --git a/src/search_stream.rs b/src/search_stream.rs index 08f70184..ec8453de 100644 --- a/src/search_stream.rs +++ b/src/search_stream.rs @@ -543,13 +543,88 @@ pub fn is_binary(buf: &[u8]) -> bool { } /// Count the number of lines in the given buffer. -#[inline(always)] -pub fn count_lines(mut buf: &[u8], eol: u8) -> u64 { +#[inline(never)] + +#[inline(never)] +pub fn count_lines(buf: &[u8], eol: u8) -> u64 { + // This was adapted from code in the memchr crate. The specific benefit + // here is that we can avoid a branch in the inner loop because all we're + // doing is counting. + + // The technique to count EOL bytes was adapted from: + // http://bits.stephan-brumme.com/null.html + const LO_U64: u64 = 0x0101010101010101; + const HI_U64: u64 = 0x8080808080808080; + + // use truncation + const LO_USIZE: usize = LO_U64 as usize; + const HI_USIZE: usize = HI_U64 as usize; + + #[cfg(target_pointer_width = "32")] + const USIZE_BYTES: usize = 4; + #[cfg(target_pointer_width = "64")] + const USIZE_BYTES: usize = 8; + + fn count_eol(eol: usize) -> u64 { + // Ideally, this would compile down to a POPCNT instruction, but + // it looks like you need to set RUSTFLAGS="-C target-cpu=native" + // (or target-feature=+popcnt) to get that to work. Bummer. + (eol.wrapping_sub(LO_USIZE) & !eol & HI_USIZE).count_ones() as u64 + } + + #[cfg(target_pointer_width = "32")] + fn repeat_byte(b: u8) -> usize { + let mut rep = (b as usize) << 8 | b as usize; + rep = rep << 16 | rep; + rep + } + + #[cfg(target_pointer_width = "64")] + fn repeat_byte(b: u8) -> usize { + let mut rep = (b as usize) << 8 | b as usize; + rep = rep << 16 | rep; + rep = rep << 32 | rep; + rep + } + + fn count_lines_slow(mut buf: &[u8], eol: u8) -> u64 { + let mut count = 0; + while let Some(pos) = memchr(eol, buf) { + count += 1; + buf = &buf[pos + 1..]; + } + count + } + + let len = buf.len(); + let ptr = buf.as_ptr(); let mut count = 0; - while let Some(pos) = memchr(eol, buf) { - count += 1; - buf = &buf[pos + 1..]; + + // Search up to an aligned boundary... + let align = (ptr as usize) & (USIZE_BYTES - 1); + let mut i = 0; + if align > 0 { + i = cmp::min(USIZE_BYTES - align, len); + count += count_lines_slow(&buf[..i], eol); + } + + // ... and search the rest. + let repeated_eol = repeat_byte(eol); + + if len >= 2 * USIZE_BYTES { + while i <= len - (2 * USIZE_BYTES) { + unsafe { + let u = *(ptr.offset(i as isize) as *const usize); + let v = *(ptr.offset((i + USIZE_BYTES) as isize) + as *const usize); + + count += count_eol(u ^ repeated_eol); + count += count_eol(v ^ repeated_eol); + } + i += USIZE_BYTES * 2; + } } + count += count_lines_slow(&buf[i..], eol); count } |