summaryrefslogtreecommitdiffstats
path: root/buffered-reader
diff options
context:
space:
mode:
authorNeal H. Walfield <neal@pep.foundation>2018-02-23 12:55:14 +0100
committerNeal H. Walfield <neal@pep.foundation>2018-02-25 20:29:29 +0100
commitfd71e3741afadbb7d8e3de655fb1d35edea8c9af (patch)
tree7a2a1540c6cd71db75a8915b67b75b786bb7c48a /buffered-reader
parente231c3e62e7724203595b484a2cf8a7357cb9d84 (diff)
buffered-reader: Provide a method to get the internal buffer.
- data() returns the internal buffer, but needs a mutable reference. This new function, buffer(), returns the internal buffer with a normal reference.
Diffstat (limited to 'buffered-reader')
-rw-r--r--buffered-reader/src/decompress.rs79
-rw-r--r--buffered-reader/src/generic.rs96
-rw-r--r--buffered-reader/src/lib.rs16
-rw-r--r--buffered-reader/src/limitor.rs196
-rw-r--r--buffered-reader/src/memory.rs50
5 files changed, 346 insertions, 91 deletions
diff --git a/buffered-reader/src/decompress.rs b/buffered-reader/src/decompress.rs
index 81a11e48..6ed2f8c6 100644
--- a/buffered-reader/src/decompress.rs
+++ b/buffered-reader/src/decompress.rs
@@ -47,6 +47,10 @@ impl <R: BufferedReader<C>, C> fmt::Debug for BufferedReaderDeflate<R, C> {
impl<R: BufferedReader<C>, C> BufferedReader<C>
for BufferedReaderDeflate<R, C> {
+ fn buffer(&self) -> &[u8] {
+ return self.reader.buffer();
+ }
+
fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> {
return self.reader.data(amount);
}
@@ -155,6 +159,10 @@ impl <R: BufferedReader<C>, C> fmt::Debug for BufferedReaderZlib<R, C> {
impl<R: BufferedReader<C>, C> BufferedReader<C>
for BufferedReaderZlib<R, C> {
+ fn buffer(&self) -> &[u8] {
+ return self.reader.buffer();
+ }
+
fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> {
return self.reader.data(amount);
}
@@ -262,6 +270,10 @@ impl <R: BufferedReader<C>, C> fmt::Debug for BufferedReaderBzip<R, C> {
}
impl<R: BufferedReader<C>, C> BufferedReader<C> for BufferedReaderBzip<R, C> {
+ fn buffer(&self) -> &[u8] {
+ return self.reader.buffer();
+ }
+
fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> {
return self.reader.data(amount);
}
@@ -330,3 +342,70 @@ impl<R: BufferedReader<C>, C> BufferedReader<C> for BufferedReaderBzip<R, C> {
}
}
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ // Test that buffer() returns the same data as data().
+ #[test]
+ fn buffer_test() {
+ use flate2::write::DeflateEncoder;
+ use flate2::Compression;
+ use std::io::prelude::*;
+
+ // Test vector.
+ let size = 10 * DEFAULT_BUF_SIZE;
+ let mut input_raw = Vec::with_capacity(size);
+ let mut v = 0u8;
+ for _ in 0..size {
+ input_raw.push(v);
+ if v == std::u8::MAX {
+ v = 0;
+ } else {
+ v += 1;
+ }
+ }
+
+ // Compress the raw input.
+ let mut input = Vec::new();
+ {
+ let mut encoder =
+ DeflateEncoder::new(&mut input, Compression::default());
+ encoder.write(&input_raw[..]).unwrap();
+ encoder.try_finish().unwrap();
+ }
+
+ let mut reader = BufferedReaderDeflate::new(
+ BufferedReaderGeneric::new(&input[..], None));
+
+ // Gather some stats to make it easier to figure out whether
+ // this test is working.
+ let stats_count = 2 * DEFAULT_BUF_SIZE;
+ let mut stats = vec![0usize; stats_count];
+
+ for i in 0..input_raw.len() {
+ let data = reader.data(DEFAULT_BUF_SIZE + 1).unwrap().to_vec();
+ assert!(data.len() > 0);
+ assert_eq!(data, reader.buffer());
+ // And, we may as well check to make sure we read the
+ // right data.
+ assert_eq!(data, &input_raw[i..i+data.len()]);
+
+ stats[cmp::min(data.len(), stats_count - 1)] += 1;
+
+ // Consume one byte and see what happens.
+ reader.consume(1);
+ }
+
+ if false {
+ for i in 0..stats.len() {
+ if stats[i] > 0 {
+ if i == stats.len() - 1 {
+ eprint!(">=");
+ }
+ eprintln!("{}: {}", i, stats[i]);
+ }
+ }
+ }
+ }
+}
diff --git a/buffered-reader/src/generic.rs b/buffered-reader/src/generic.rs
index 81469efd..f2f11cd6 100644
--- a/buffered-reader/src/generic.rs
+++ b/buffered-reader/src/generic.rs
@@ -193,6 +193,14 @@ impl<T: io::Read, C> io::Read for BufferedReaderGeneric<T, C> {
}
impl<T: io::Read, C> BufferedReader<C> for BufferedReaderGeneric<T, C> {
+ fn buffer(&self) -> &[u8] {
+ if let Some(ref buffer) = self.buffer {
+ &buffer[self.cursor..]
+ } else {
+ &b""[..]
+ }
+ }
+
fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> {
return self.data_helper(amount, false, false);
}
@@ -260,27 +268,81 @@ impl<T: io::Read, C> BufferedReader<C> for BufferedReaderGeneric<T, C> {
}
}
-#[test]
-fn buffered_reader_generic_test() {
- // Test reading from a file.
- {
- use std::path::PathBuf;
- use std::fs::File;
+#[cfg(test)]
+mod test {
+ use super::*;
+
+ #[test]
+ fn buffered_reader_generic_test() {
+ // Test reading from a file.
+ {
+ use std::path::PathBuf;
+ use std::fs::File;
- let path : PathBuf = [env!("CARGO_MANIFEST_DIR"),
- "src", "buffered-reader-test.txt"]
- .iter().collect();
- let mut f = File::open(&path).expect(&path.to_string_lossy());
- let mut bio = BufferedReaderGeneric::new(&mut f, None);
+ let path : PathBuf = [env!("CARGO_MANIFEST_DIR"),
+ "src", "buffered-reader-test.txt"]
+ .iter().collect();
+ let mut f = File::open(&path).expect(&path.to_string_lossy());
+ let mut bio = BufferedReaderGeneric::new(&mut f, None);
- buffered_reader_test_data_check(&mut bio);
+ buffered_reader_test_data_check(&mut bio);
+ }
+
+ // Same test, but as a slice.
+ {
+ let mut data : &[u8] = include_bytes!("buffered-reader-test.txt");
+ let mut bio = BufferedReaderGeneric::new(&mut data, None);
+
+ buffered_reader_test_data_check(&mut bio);
+ }
}
- // Same test, but as a slice.
- {
- let mut data : &[u8] = include_bytes!("buffered-reader-test.txt");
- let mut bio = BufferedReaderGeneric::new(&mut data, None);
+ // Test that buffer() returns the same data as data().
+ #[test]
+ fn buffer_test() {
+ // Test vector.
+ let size = 10 * DEFAULT_BUF_SIZE;
+ let mut input = Vec::with_capacity(size);
+ let mut v = 0u8;
+ for _ in 0..size {
+ input.push(v);
+ if v == std::u8::MAX {
+ v = 0;
+ } else {
+ v += 1;
+ }
+ }
+
+ let mut reader = BufferedReaderGeneric::new(&input[..], None);
+
+ // Gather some stats to make it easier to figure out whether
+ // this test is working.
+ let stats_count = 2 * DEFAULT_BUF_SIZE;
+ let mut stats = vec![0usize; stats_count];
+
+ for i in 0..input.len() {
+ let data = reader.data(DEFAULT_BUF_SIZE + 1).unwrap().to_vec();
+ assert!(data.len() > 0);
+ assert_eq!(data, reader.buffer());
+ // And, we may as well check to make sure we read the
+ // right data.
+ assert_eq!(data, &input[i..i+data.len()]);
+
+ stats[cmp::min(data.len(), stats_count - 1)] += 1;
- buffered_reader_test_data_check(&mut bio);
+ // Consume one byte and see what happens.
+ reader.consume(1);
+ }
+
+ if false {
+ for i in 0..stats.len() {
+ if stats[i] > 0 {
+ if i == stats.len() - 1 {
+ eprint!(">=");
+ }
+ eprintln!("{}: {}", i, stats[i]);
+ }
+ }
+ }
}
}
diff --git a/buffered-reader/src/lib.rs b/buffered-reader/src/lib.rs
index 82cab946..aef5c897 100644
--- a/buffered-reader/src/lib.rs
+++ b/buffered-reader/src/lib.rs
@@ -32,6 +32,12 @@ const DEFAULT_BUF_SIZE: usize = 8 * 1024;
/// `BufferedReader` allows the caller to ensure that the internal
/// buffer has a certain amount of data.
pub trait BufferedReader<C> : io::Read + fmt::Debug {
+ /// Returns a reference to the internal buffer.
+ ///
+ /// Note: this will return the same data as self.data(0), but it
+ /// does so without mutable borrowing self.
+ fn buffer(&self) -> &[u8];
+
/// Return the data in the internal buffer. Normally, the
/// returned buffer will contain *at least* `amount` bytes worth
/// of data. Less data may be returned if (and only if) the end
@@ -112,8 +118,8 @@ pub trait BufferedReader<C> : io::Read + fmt::Debug {
-> Result<&[u8], std::io::Error>;
- // This is a convenient function that effectively combines
- // data_hard() and consume().
+ /// This is a convenient function that effectively combines
+ /// data_hard() and consume().
fn data_consume_hard(&mut self, amount: usize) -> Result<&[u8], io::Error>;
/// A convenience function for reading a 16-bit unsigned integer
@@ -132,7 +138,7 @@ pub trait BufferedReader<C> : io::Read + fmt::Debug {
}
/// Reads and consumes `amount` bytes, and returns them in a
- /// caller owned buffer. Implementations may optimize this to
+ /// caller-owned buffer. Implementations may optimize this to
/// avoid a copy.
fn steal(&mut self, amount: usize) -> Result<Vec<u8>, std::io::Error> {
let mut data = self.data_consume_hard(amount)?;
@@ -230,6 +236,10 @@ pub fn buffered_reader_generic_read_impl<T: BufferedReader<C>, C>
/// Make a `Box<BufferedReader>` look like a BufferedReader.
impl <'a, C> BufferedReader<C> for Box<BufferedReader<C> + 'a> {
+ fn buffer(&self) -> &[u8] {
+ return self.as_ref().buffer();
+ }
+
fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> {
return self.as_mut().data(amount);
}
diff --git a/buffered-reader/src/limitor.rs b/buffered-reader/src/limitor.rs
index 0bce61f9..ee8dc853 100644
--- a/buffered-reader/src/limitor.rs
+++ b/buffered-reader/src/limitor.rs
@@ -52,6 +52,13 @@ impl<T: BufferedReader<C>, C> io::Read for BufferedReaderLimitor<T, C> {
}
impl<T: BufferedReader<C>, C> BufferedReader<C> for BufferedReaderLimitor<T, C> {
+ fn buffer(&self) -> &[u8] {
+ let buf = self.reader.buffer();
+ &buf[..cmp::min(buf.len(),
+ cmp::min(std::usize::MAX as u64,
+ self.limit) as usize)]
+ }
+
/// Return the buffer. Ensure that it contains at least `amount`
/// bytes.
fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> {
@@ -127,86 +134,143 @@ impl<T: BufferedReader<C>, C> BufferedReader<C> for BufferedReaderLimitor<T, C>
}
}
-#[test]
-fn buffered_reader_limitor_test() {
- let data : &[u8] = b"01234567890123456789";
+#[cfg(test)]
+mod test {
+ use super::*;
- /* Add a single limitor. */
- {
- let mut bio : Box<BufferedReader<()>>
- = Box::new(BufferedReaderMemory::new(data));
+ #[test]
+ fn buffered_reader_limitor_test() {
+ let data : &[u8] = b"01234567890123456789";
- bio = {
- let mut bio2 = Box::new(BufferedReaderLimitor::new(bio, 5));
- {
- let result = bio2.data(5).unwrap();
- assert_eq!(result.len(), 5);
- assert_eq!(result, &b"01234"[..]);
- }
- bio2.consume(5);
- {
- let result = bio2.data(1).unwrap();
- assert_eq!(result.len(), 0);
- assert_eq!(result, &b""[..]);
- }
+ /* Add a single limitor. */
+ {
+ let mut bio : Box<BufferedReader<()>>
+ = Box::new(BufferedReaderMemory::new(data));
- bio2.into_inner().unwrap()
- };
+ bio = {
+ let mut bio2 = Box::new(BufferedReaderLimitor::new(bio, 5));
+ {
+ let result = bio2.data(5).unwrap();
+ assert_eq!(result.len(), 5);
+ assert_eq!(result, &b"01234"[..]);
+ }
+ bio2.consume(5);
+ {
+ let result = bio2.data(1).unwrap();
+ assert_eq!(result.len(), 0);
+ assert_eq!(result, &b""[..]);
+ }
+
+ bio2.into_inner().unwrap()
+ };
- {
- {
- let result = bio.data(15).unwrap();
- assert_eq!(result.len(), 15);
- assert_eq!(result, &b"567890123456789"[..]);
- }
- bio.consume(15);
{
- let result = bio.data(1).unwrap();
- assert_eq!(result.len(), 0);
- assert_eq!(result, &b""[..]);
+ {
+ let result = bio.data(15).unwrap();
+ assert_eq!(result.len(), 15);
+ assert_eq!(result, &b"567890123456789"[..]);
+ }
+ bio.consume(15);
+ {
+ let result = bio.data(1).unwrap();
+ assert_eq!(result.len(), 0);
+ assert_eq!(result, &b""[..]);
+ }
}
}
- }
- /* Try with two limitors where the first one imposes the real
- * limit. */
- {
- let mut bio : Box<BufferedReader<()>>
- = Box::new(BufferedReaderMemory::new(data));
+ /* Try with two limitors where the first one imposes the real
+ * limit. */
+ {
+ let mut bio : Box<BufferedReader<()>>
+ = Box::new(BufferedReaderMemory::new(data));
+
+ bio = {
+ let bio2 : Box<BufferedReader<()>>
+ = Box::new(BufferedReaderLimitor::new(bio, 5));
+ // We limit to 15 bytes, but bio2 will still limit us to 5
+ // bytes.
+ let mut bio3 : Box<BufferedReader<()>>
+ = Box::new(BufferedReaderLimitor::new(bio2, 15));
+ {
+ let result = bio3.data(100).unwrap();
+ assert_eq!(result.len(), 5);
+ assert_eq!(result, &b"01234"[..]);
+ }
+ bio3.consume(5);
+ {
+ let result = bio3.data(1).unwrap();
+ assert_eq!(result.len(), 0);
+ assert_eq!(result, &b""[..]);
+ }
+
+ bio3.into_inner().unwrap().into_inner().unwrap()
+ };
- bio = {
- let bio2 : Box<BufferedReader<()>>
- = Box::new(BufferedReaderLimitor::new(bio, 5));
- // We limit to 15 bytes, but bio2 will still limit us to 5
- // bytes.
- let mut bio3 : Box<BufferedReader<()>>
- = Box::new(BufferedReaderLimitor::new(bio2, 15));
{
- let result = bio3.data(100).unwrap();
- assert_eq!(result.len(), 5);
- assert_eq!(result, &b"01234"[..]);
+ {
+ let result = bio.data(15).unwrap();
+ assert_eq!(result.len(), 15);
+ assert_eq!(result, &b"567890123456789"[..]);
+ }
+ bio.consume(15);
+ {
+ let result = bio.data(1).unwrap();
+ assert_eq!(result.len(), 0);
+ assert_eq!(result, &b""[..]);
+ }
}
- bio3.consume(5);
- {
- let result = bio3.data(1).unwrap();
- assert_eq!(result.len(), 0);
- assert_eq!(result, &b""[..]);
+ }
+ }
+
+ // Test that buffer() returns the same data as data().
+ #[test]
+ fn buffer_test() {
+ // Test vector.
+ let size = 10 * DEFAULT_BUF_SIZE;
+ let mut input = Vec::with_capacity(size);
+ let mut v = 0u8;
+ for _ in 0..size {
+ input.push(v);
+ if v == std::u8::MAX {
+ v = 0;
+ } else {
+ v += 1;
}
+ }
- bio3.into_inner().unwrap().into_inner().unwrap()
- };
+ let reader = BufferedReaderGeneric::new(&input[..], None);
+ let size = size / 2;
+ let input = &input[..size];
+ let mut reader = BufferedReaderLimitor::new(reader, input.len() as u64);
- {
- {
- let result = bio.data(15).unwrap();
- assert_eq!(result.len(), 15);
- assert_eq!(result, &b"567890123456789"[..]);
- }
- bio.consume(15);
- {
- let result = bio.data(1).unwrap();
- assert_eq!(result.len(), 0);
- assert_eq!(result, &b""[..]);
+ // Gather some stats to make it easier to figure out whether
+ // this test is working.
+ let stats_count = 2 * DEFAULT_BUF_SIZE;
+ let mut stats = vec![0usize; stats_count];
+
+ for i in 0..input.len() {
+ let data = reader.data(DEFAULT_BUF_SIZE + 1).unwrap().to_vec();
+ assert!(data.len() > 0);
+ assert_eq!(data, reader.buffer());
+ // And, we may as well check to make sure we read the
+ // right data.
+ assert_eq!(data, &input[i..i+data.len()]);
+
+ stats[cmp::min(data.len(), stats_count - 1)] += 1;
+
+ // Consume one byte and see what happens.
+ reader.consume(1);
+ }
+
+ if false {
+ for i in 0..stats.len() {
+ if stats[i] > 0 {
+ if i == stats.len() - 1 {
+ eprint!(">=");
+ }
+ eprintln!("{}: {}", i, stats[i]);
+ }
}
}
}
diff --git a/buffered-reader/src/memory.rs b/buffered-reader/src/memory.rs
index 5bbc2e13..83de83a8 100644
--- a/buffered-reader/src/memory.rs
+++ b/buffered-reader/src/memory.rs
@@ -63,6 +63,10 @@ impl<'a, C> io::Read for BufferedReaderMemory<'a, C> {
}
impl<'a, C> BufferedReader<C> for BufferedReaderMemory<'a, C> {
+ fn buffer(&self) -> &[u8] {
+ &self.buffer[self.cursor..]
+ }
+
/// Return the buffer. Ensure that it contains at least `amount`
/// bytes.
fn data(&mut self, _amount: usize) -> Result<&[u8], io::Error> {
@@ -120,10 +124,46 @@ impl<'a, C> BufferedReader<C> for BufferedReaderMemory<'a, C> {
}
}
-#[test]
-fn buffered_reader_memory_test () {
- let data : &[u8] = include_bytes!("buffered-reader-test.txt");
- let mut bio = BufferedReaderMemory::new(data);
+#[cfg(test)]
+mod test {
+ use super::*;
+ #[test]
+ fn buffered_reader_memory_test () {
+ let data : &[u8] = include_bytes!("buffered-reader-test.txt");
+ let mut bio = BufferedReaderMemory::new(data);
+
+ buffered_reader_test_data_check(&mut bio);
+ }
+
+ // Test that buffer() returns the same data as data().
+ #[test]
+ fn buffer_test() {
+ // Test vector. A BufferedReaderMemory returns all unconsumed
+ // data. So, use a relatively small buffer size.
+ let size = DEFAULT_BUF_SIZE;
+ let mut input = Vec::with_capacity(size);
+ let mut v = 0u8;
+ for _ in 0..size {
+ input.push(v);
+ if v == std::u8::MAX {
+ v = 0;
+ } else {
+ v += 1;
+ }
+ }
+
+ let mut reader = BufferedReaderMemory::new(&input[..]);
+
+ for i in 0..input.len() {
+ let data = reader.data(DEFAULT_BUF_SIZE + 1).unwrap().to_vec();
+ assert!(data.len() > 0);
+ assert_eq!(data, reader.buffer());
+ // And, we may as well check to make sure we read the
+ // right data.
+ assert_eq!(data, &input[i..i+data.len()]);
- buffered_reader_test_data_check(&mut bio);
+ // Consume one byte and see what happens.
+ reader.consume(1);
+ }
+ }
}