summaryrefslogtreecommitdiffstats
path: root/buffered-reader/src
diff options
context:
space:
mode:
authorJustus Winter <justus@sequoia-pgp.org>2018-09-10 14:30:57 +0200
committerJustus Winter <justus@sequoia-pgp.org>2018-09-17 14:44:34 +0200
commit6397b048cf6943f38aefb97c1c4199ad744ca978 (patch)
tree93aeaec95d5c14a70919c6f1a6e31724e063a04b /buffered-reader/src
parent030b195820c82d8fd89feefa5034e966446f4546 (diff)
buffered-reader: Add mmapping BufferedReaderFile variant.
- Fixes #98.
Diffstat (limited to 'buffered-reader/src')
-rw-r--r--buffered-reader/src/file_unix.rs223
-rw-r--r--buffered-reader/src/lib.rs11
2 files changed, 234 insertions, 0 deletions
diff --git a/buffered-reader/src/file_unix.rs b/buffered-reader/src/file_unix.rs
new file mode 100644
index 00000000..77c27d77
--- /dev/null
+++ b/buffered-reader/src/file_unix.rs
@@ -0,0 +1,223 @@
+//! A mmapping `BufferedReader` implementation for files.
+//!
+//! On my (Justus) system, this implementation improves the
+//! performance of the statistics example by ~10% over the
+//! BufferedReaderGeneric.
+
+use libc::{c_void, size_t, mmap, munmap, PROT_READ, MAP_PRIVATE};
+use std::fmt;
+use std::fs::File;
+use std::io;
+use std::os::unix::io::AsRawFd;
+use std::slice;
+use std::path::Path;
+use std::ptr;
+
+use super::*;
+
+// For small files, the overhead of manipulating the page table is not
+// worth the gain. This threshold has been chosen so that on my
+// (Justus) system, mmaping is faster than sequentially reading.
+const MMAP_THRESHOLD: u64 = 16 * 4096;
+
+/// A `BufferedReader` implementation for files.
+///
+/// This implementation tries to mmap the file, falling back to
+/// just using a generic reader.
+pub struct BufferedReaderFile<'a, C>(Imp<'a, C>);
+
+impl<'a, C> fmt::Debug for BufferedReaderFile<'a, C> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.debug_tuple("BufferedReaderFile")
+ .field(&self.0)
+ .finish()
+ }
+}
+
+/// The implementation.
+enum Imp<'a, C> {
+ Generic(BufferedReaderGeneric<File, C>),
+ MMAP {
+ addr: *mut c_void,
+ length: size_t,
+ reader: BufferedReaderMemory<'a, C>,
+ }
+}
+
+impl<'a, C> Drop for Imp<'a, C> {
+ fn drop(&mut self) {
+ match self {
+ Imp::Generic(_) => (),
+ Imp::MMAP { addr, length, .. } =>
+ unsafe {
+ munmap(*addr, *length);
+ },
+ }
+ }
+}
+
+impl<'a, C> fmt::Debug for Imp<'a, C> {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ match self {
+ Imp::Generic(ref g) =>
+ f.debug_tuple("Generic")
+ .field(&g)
+ .finish(),
+ Imp::MMAP { ref addr, ref length, ref reader } =>
+ f.debug_struct("MMAP")
+ .field("addr", addr)
+ .field("length", length)
+ .field("reader", reader)
+ .finish(),
+ }
+ }
+}
+
+impl<'a> BufferedReaderFile<'a, ()> {
+ /// Opens the given file.
+ pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
+ Self::with_cookie(path, ())
+ }
+}
+
+impl<'a, C> BufferedReaderFile<'a, C> {
+ /// Like `open()`, but sets a cookie.
+ pub fn with_cookie<P: AsRef<Path>>(path: P, cookie: C) -> io::Result<Self> {
+ // As fallback, we use a generic reader.
+ let generic = |file, cookie| {
+ Ok(BufferedReaderFile(
+ Imp::Generic(
+ BufferedReaderGeneric::with_cookie(file, None, cookie))))
+ };
+
+ let file = File::open(path)?;
+
+ // For testing and benchmarking purposes, we use the variable
+ // SEQUOIA_DONT_MMAP to turn off mmapping.
+ if ::std::env::var_os("SEQUOIA_DONT_MMAP").is_some() {
+ return generic(file, cookie);
+ }
+
+ let length = file.metadata()?.len();
+
+ // For small files, the overhead of manipulating the page
+ // table is not worth the gain.
+ if length < MMAP_THRESHOLD {
+ return generic(file, cookie);
+ }
+
+ // Be nice to 32 bit systems.
+ if length > usize::max_value() as u64 {
+ return generic(file, cookie);
+ }
+ let length = length as usize;
+
+ let fd = file.as_raw_fd();
+ let addr = unsafe {
+ mmap(ptr::null_mut(), length, PROT_READ, MAP_PRIVATE,
+ fd, 0)
+ };
+ if addr.is_null() {
+ return generic(file, cookie);
+ }
+
+ let slice = unsafe {
+ slice::from_raw_parts(addr as *const u8, length)
+ };
+
+ Ok(BufferedReaderFile(
+ Imp::MMAP {
+ addr: addr,
+ length: length,
+ reader: BufferedReaderMemory::with_cookie(slice, cookie),
+ }
+ ))
+ }
+}
+
+impl<'a, C> io::Read for BufferedReaderFile<'a, C> {
+ fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+ match self.0 {
+ Imp::Generic(ref mut reader) => reader.read(buf),
+ Imp::MMAP { ref mut reader, .. } => reader.read(buf),
+ }
+ }
+}
+
+impl<'a, C> BufferedReader<C> for BufferedReaderFile<'a, C> {
+ fn buffer(&self) -> &[u8] {
+ match self.0 {
+ Imp::Generic(ref reader) => reader.buffer(),
+ Imp::MMAP { ref reader, .. } => reader.buffer(),
+ }
+ }
+
+ fn data(&mut self, amount: usize) -> io::Result<&[u8]> {
+ match self.0 {
+ Imp::Generic(ref mut reader) => reader.data(amount),
+ Imp::MMAP { ref mut reader, .. } => reader.data(amount),
+ }
+ }
+
+ fn data_hard(&mut self, amount: usize) -> io::Result<&[u8]> {
+ match self.0 {
+ Imp::Generic(ref mut reader) => reader.data_hard(amount),
+ Imp::MMAP { ref mut reader, .. } => reader.data_hard(amount),
+ }
+ }
+
+ fn consume(&mut self, amount: usize) -> &[u8] {
+ match self.0 {
+ Imp::Generic(ref mut reader) => reader.consume(amount),
+ Imp::MMAP { ref mut reader, .. } => reader.consume(amount),
+ }
+ }
+
+ fn data_consume(&mut self, amount: usize) -> io::Result<&[u8]> {
+ match self.0 {
+ Imp::Generic(ref mut reader) => reader.data_consume(amount),
+ Imp::MMAP { ref mut reader, .. } => reader.data_consume(amount),
+ }
+ }
+
+ fn data_consume_hard(&mut self, amount: usize) -> io::Result<&[u8]> {
+ match self.0 {
+ Imp::Generic(ref mut reader) => reader.data_consume_hard(amount),
+ Imp::MMAP { ref mut reader, .. } => reader.data_consume_hard(amount),
+ }
+ }
+
+ fn get_mut(&mut self) -> Option<&mut BufferedReader<C>> {
+ None
+ }
+
+ fn get_ref(&self) -> Option<&BufferedReader<C>> {
+ None
+ }
+
+ fn into_inner<'b>(self: Box<Self>) -> Option<Box<BufferedReader<C> + 'b>>
+ where Self: 'b {
+ None
+ }
+
+ fn cookie_set(&mut self, cookie: C) -> C {
+ match self.0 {
+ Imp::Generic(ref mut reader) => reader.cookie_set(cookie),
+ Imp::MMAP { ref mut reader, .. } => reader.cookie_set(cookie),
+ }
+ }
+
+ fn cookie_ref(&self) -> &C {
+ match self.0 {
+ Imp::Generic(ref reader) => reader.cookie_ref(),
+ Imp::MMAP { ref reader, .. } => reader.cookie_ref(),
+ }
+ }
+
+ fn cookie_mut(&mut self) -> &mut C {
+ match self.0 {
+ Imp::Generic(ref mut reader) => reader.cookie_mut(),
+ Imp::MMAP { ref mut reader, .. } => reader.cookie_mut(),
+ }
+ }
+}
diff --git a/buffered-reader/src/lib.rs b/buffered-reader/src/lib.rs
index 8008ff23..4eb0f419 100644
--- a/buffered-reader/src/lib.rs
+++ b/buffered-reader/src/lib.rs
@@ -4,6 +4,7 @@
extern crate flate2;
#[cfg(feature = "compression-bzip2")]
extern crate bzip2;
+extern crate libc;
use std::io;
use std::io::{Error, ErrorKind};
@@ -32,8 +33,18 @@ pub use self::decompress_deflate::BufferedReaderZlib;
#[cfg(feature = "compression-bzip2")]
pub use self::decompress_bzip2::BufferedReaderBzip;
+// These are the different BufferedReaderFile implementations. We
+// include the modules unconditionally, so that we catch bitrot early.
+#[allow(dead_code)]
mod file_generic;
+#[allow(dead_code)]
+mod file_unix;
+
+// Then, we select the appropriate version to re-export.
+#[cfg(not(unix))]
pub use self::file_generic::BufferedReaderFile;
+#[cfg(unix)]
+pub use self::file_unix::BufferedReaderFile;
// The default buffer size.
const DEFAULT_BUF_SIZE: usize = 8 * 1024;