From 6397b048cf6943f38aefb97c1c4199ad744ca978 Mon Sep 17 00:00:00 2001 From: Justus Winter Date: Mon, 10 Sep 2018 14:30:57 +0200 Subject: buffered-reader: Add mmapping BufferedReaderFile variant. - Fixes #98. --- buffered-reader/src/file_unix.rs | 223 +++++++++++++++++++++++++++++++++++++++ buffered-reader/src/lib.rs | 11 ++ 2 files changed, 234 insertions(+) create mode 100644 buffered-reader/src/file_unix.rs (limited to 'buffered-reader/src') diff --git a/buffered-reader/src/file_unix.rs b/buffered-reader/src/file_unix.rs new file mode 100644 index 00000000..77c27d77 --- /dev/null +++ b/buffered-reader/src/file_unix.rs @@ -0,0 +1,223 @@ +//! A mmapping `BufferedReader` implementation for files. +//! +//! On my (Justus) system, this implementation improves the +//! performance of the statistics example by ~10% over the +//! BufferedReaderGeneric. + +use libc::{c_void, size_t, mmap, munmap, PROT_READ, MAP_PRIVATE}; +use std::fmt; +use std::fs::File; +use std::io; +use std::os::unix::io::AsRawFd; +use std::slice; +use std::path::Path; +use std::ptr; + +use super::*; + +// For small files, the overhead of manipulating the page table is not +// worth the gain. This threshold has been chosen so that on my +// (Justus) system, mmaping is faster than sequentially reading. +const MMAP_THRESHOLD: u64 = 16 * 4096; + +/// A `BufferedReader` implementation for files. +/// +/// This implementation tries to mmap the file, falling back to +/// just using a generic reader. +pub struct BufferedReaderFile<'a, C>(Imp<'a, C>); + +impl<'a, C> fmt::Debug for BufferedReaderFile<'a, C> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_tuple("BufferedReaderFile") + .field(&self.0) + .finish() + } +} + +/// The implementation. +enum Imp<'a, C> { + Generic(BufferedReaderGeneric), + MMAP { + addr: *mut c_void, + length: size_t, + reader: BufferedReaderMemory<'a, C>, + } +} + +impl<'a, C> Drop for Imp<'a, C> { + fn drop(&mut self) { + match self { + Imp::Generic(_) => (), + Imp::MMAP { addr, length, .. } => + unsafe { + munmap(*addr, *length); + }, + } + } +} + +impl<'a, C> fmt::Debug for Imp<'a, C> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Imp::Generic(ref g) => + f.debug_tuple("Generic") + .field(&g) + .finish(), + Imp::MMAP { ref addr, ref length, ref reader } => + f.debug_struct("MMAP") + .field("addr", addr) + .field("length", length) + .field("reader", reader) + .finish(), + } + } +} + +impl<'a> BufferedReaderFile<'a, ()> { + /// Opens the given file. + pub fn open>(path: P) -> io::Result { + Self::with_cookie(path, ()) + } +} + +impl<'a, C> BufferedReaderFile<'a, C> { + /// Like `open()`, but sets a cookie. + pub fn with_cookie>(path: P, cookie: C) -> io::Result { + // As fallback, we use a generic reader. + let generic = |file, cookie| { + Ok(BufferedReaderFile( + Imp::Generic( + BufferedReaderGeneric::with_cookie(file, None, cookie)))) + }; + + let file = File::open(path)?; + + // For testing and benchmarking purposes, we use the variable + // SEQUOIA_DONT_MMAP to turn off mmapping. + if ::std::env::var_os("SEQUOIA_DONT_MMAP").is_some() { + return generic(file, cookie); + } + + let length = file.metadata()?.len(); + + // For small files, the overhead of manipulating the page + // table is not worth the gain. + if length < MMAP_THRESHOLD { + return generic(file, cookie); + } + + // Be nice to 32 bit systems. + if length > usize::max_value() as u64 { + return generic(file, cookie); + } + let length = length as usize; + + let fd = file.as_raw_fd(); + let addr = unsafe { + mmap(ptr::null_mut(), length, PROT_READ, MAP_PRIVATE, + fd, 0) + }; + if addr.is_null() { + return generic(file, cookie); + } + + let slice = unsafe { + slice::from_raw_parts(addr as *const u8, length) + }; + + Ok(BufferedReaderFile( + Imp::MMAP { + addr: addr, + length: length, + reader: BufferedReaderMemory::with_cookie(slice, cookie), + } + )) + } +} + +impl<'a, C> io::Read for BufferedReaderFile<'a, C> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self.0 { + Imp::Generic(ref mut reader) => reader.read(buf), + Imp::MMAP { ref mut reader, .. } => reader.read(buf), + } + } +} + +impl<'a, C> BufferedReader for BufferedReaderFile<'a, C> { + fn buffer(&self) -> &[u8] { + match self.0 { + Imp::Generic(ref reader) => reader.buffer(), + Imp::MMAP { ref reader, .. } => reader.buffer(), + } + } + + fn data(&mut self, amount: usize) -> io::Result<&[u8]> { + match self.0 { + Imp::Generic(ref mut reader) => reader.data(amount), + Imp::MMAP { ref mut reader, .. } => reader.data(amount), + } + } + + fn data_hard(&mut self, amount: usize) -> io::Result<&[u8]> { + match self.0 { + Imp::Generic(ref mut reader) => reader.data_hard(amount), + Imp::MMAP { ref mut reader, .. } => reader.data_hard(amount), + } + } + + fn consume(&mut self, amount: usize) -> &[u8] { + match self.0 { + Imp::Generic(ref mut reader) => reader.consume(amount), + Imp::MMAP { ref mut reader, .. } => reader.consume(amount), + } + } + + fn data_consume(&mut self, amount: usize) -> io::Result<&[u8]> { + match self.0 { + Imp::Generic(ref mut reader) => reader.data_consume(amount), + Imp::MMAP { ref mut reader, .. } => reader.data_consume(amount), + } + } + + fn data_consume_hard(&mut self, amount: usize) -> io::Result<&[u8]> { + match self.0 { + Imp::Generic(ref mut reader) => reader.data_consume_hard(amount), + Imp::MMAP { ref mut reader, .. } => reader.data_consume_hard(amount), + } + } + + fn get_mut(&mut self) -> Option<&mut BufferedReader> { + None + } + + fn get_ref(&self) -> Option<&BufferedReader> { + None + } + + fn into_inner<'b>(self: Box) -> Option + 'b>> + where Self: 'b { + None + } + + fn cookie_set(&mut self, cookie: C) -> C { + match self.0 { + Imp::Generic(ref mut reader) => reader.cookie_set(cookie), + Imp::MMAP { ref mut reader, .. } => reader.cookie_set(cookie), + } + } + + fn cookie_ref(&self) -> &C { + match self.0 { + Imp::Generic(ref reader) => reader.cookie_ref(), + Imp::MMAP { ref reader, .. } => reader.cookie_ref(), + } + } + + fn cookie_mut(&mut self) -> &mut C { + match self.0 { + Imp::Generic(ref mut reader) => reader.cookie_mut(), + Imp::MMAP { ref mut reader, .. } => reader.cookie_mut(), + } + } +} diff --git a/buffered-reader/src/lib.rs b/buffered-reader/src/lib.rs index 8008ff23..4eb0f419 100644 --- a/buffered-reader/src/lib.rs +++ b/buffered-reader/src/lib.rs @@ -4,6 +4,7 @@ extern crate flate2; #[cfg(feature = "compression-bzip2")] extern crate bzip2; +extern crate libc; use std::io; use std::io::{Error, ErrorKind}; @@ -32,8 +33,18 @@ pub use self::decompress_deflate::BufferedReaderZlib; #[cfg(feature = "compression-bzip2")] pub use self::decompress_bzip2::BufferedReaderBzip; +// These are the different BufferedReaderFile implementations. We +// include the modules unconditionally, so that we catch bitrot early. +#[allow(dead_code)] mod file_generic; +#[allow(dead_code)] +mod file_unix; + +// Then, we select the appropriate version to re-export. +#[cfg(not(unix))] pub use self::file_generic::BufferedReaderFile; +#[cfg(unix)] +pub use self::file_unix::BufferedReaderFile; // The default buffer size. const DEFAULT_BUF_SIZE: usize = 8 * 1024; -- cgit v1.2.3