From fd71e3741afadbb7d8e3de655fb1d35edea8c9af Mon Sep 17 00:00:00 2001 From: "Neal H. Walfield" Date: Fri, 23 Feb 2018 12:55:14 +0100 Subject: buffered-reader: Provide a method to get the internal buffer. - data() returns the internal buffer, but needs a mutable reference. This new function, buffer(), returns the internal buffer with a normal reference. --- buffered-reader/src/decompress.rs | 79 +++++++++++++++ buffered-reader/src/generic.rs | 96 +++++++++++++++---- buffered-reader/src/lib.rs | 16 +++- buffered-reader/src/limitor.rs | 196 +++++++++++++++++++++++++------------- buffered-reader/src/memory.rs | 50 +++++++++- 5 files changed, 346 insertions(+), 91 deletions(-) (limited to 'buffered-reader') diff --git a/buffered-reader/src/decompress.rs b/buffered-reader/src/decompress.rs index 81a11e48..6ed2f8c6 100644 --- a/buffered-reader/src/decompress.rs +++ b/buffered-reader/src/decompress.rs @@ -47,6 +47,10 @@ impl , C> fmt::Debug for BufferedReaderDeflate { impl, C> BufferedReader for BufferedReaderDeflate { + fn buffer(&self) -> &[u8] { + return self.reader.buffer(); + } + fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> { return self.reader.data(amount); } @@ -155,6 +159,10 @@ impl , C> fmt::Debug for BufferedReaderZlib { impl, C> BufferedReader for BufferedReaderZlib { + fn buffer(&self) -> &[u8] { + return self.reader.buffer(); + } + fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> { return self.reader.data(amount); } @@ -262,6 +270,10 @@ impl , C> fmt::Debug for BufferedReaderBzip { } impl, C> BufferedReader for BufferedReaderBzip { + fn buffer(&self) -> &[u8] { + return self.reader.buffer(); + } + fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> { return self.reader.data(amount); } @@ -330,3 +342,70 @@ impl, C> BufferedReader for BufferedReaderBzip { } } +#[cfg(test)] +mod test { + use super::*; + + // Test that buffer() returns the same data as data(). + #[test] + fn buffer_test() { + use flate2::write::DeflateEncoder; + use flate2::Compression; + use std::io::prelude::*; + + // Test vector. + let size = 10 * DEFAULT_BUF_SIZE; + let mut input_raw = Vec::with_capacity(size); + let mut v = 0u8; + for _ in 0..size { + input_raw.push(v); + if v == std::u8::MAX { + v = 0; + } else { + v += 1; + } + } + + // Compress the raw input. + let mut input = Vec::new(); + { + let mut encoder = + DeflateEncoder::new(&mut input, Compression::default()); + encoder.write(&input_raw[..]).unwrap(); + encoder.try_finish().unwrap(); + } + + let mut reader = BufferedReaderDeflate::new( + BufferedReaderGeneric::new(&input[..], None)); + + // Gather some stats to make it easier to figure out whether + // this test is working. + let stats_count = 2 * DEFAULT_BUF_SIZE; + let mut stats = vec![0usize; stats_count]; + + for i in 0..input_raw.len() { + let data = reader.data(DEFAULT_BUF_SIZE + 1).unwrap().to_vec(); + assert!(data.len() > 0); + assert_eq!(data, reader.buffer()); + // And, we may as well check to make sure we read the + // right data. + assert_eq!(data, &input_raw[i..i+data.len()]); + + stats[cmp::min(data.len(), stats_count - 1)] += 1; + + // Consume one byte and see what happens. + reader.consume(1); + } + + if false { + for i in 0..stats.len() { + if stats[i] > 0 { + if i == stats.len() - 1 { + eprint!(">="); + } + eprintln!("{}: {}", i, stats[i]); + } + } + } + } +} diff --git a/buffered-reader/src/generic.rs b/buffered-reader/src/generic.rs index 81469efd..f2f11cd6 100644 --- a/buffered-reader/src/generic.rs +++ b/buffered-reader/src/generic.rs @@ -193,6 +193,14 @@ impl io::Read for BufferedReaderGeneric { } impl BufferedReader for BufferedReaderGeneric { + fn buffer(&self) -> &[u8] { + if let Some(ref buffer) = self.buffer { + &buffer[self.cursor..] + } else { + &b""[..] + } + } + fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> { return self.data_helper(amount, false, false); } @@ -260,27 +268,81 @@ impl BufferedReader for BufferedReaderGeneric { } } -#[test] -fn buffered_reader_generic_test() { - // Test reading from a file. - { - use std::path::PathBuf; - use std::fs::File; +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn buffered_reader_generic_test() { + // Test reading from a file. + { + use std::path::PathBuf; + use std::fs::File; - let path : PathBuf = [env!("CARGO_MANIFEST_DIR"), - "src", "buffered-reader-test.txt"] - .iter().collect(); - let mut f = File::open(&path).expect(&path.to_string_lossy()); - let mut bio = BufferedReaderGeneric::new(&mut f, None); + let path : PathBuf = [env!("CARGO_MANIFEST_DIR"), + "src", "buffered-reader-test.txt"] + .iter().collect(); + let mut f = File::open(&path).expect(&path.to_string_lossy()); + let mut bio = BufferedReaderGeneric::new(&mut f, None); - buffered_reader_test_data_check(&mut bio); + buffered_reader_test_data_check(&mut bio); + } + + // Same test, but as a slice. + { + let mut data : &[u8] = include_bytes!("buffered-reader-test.txt"); + let mut bio = BufferedReaderGeneric::new(&mut data, None); + + buffered_reader_test_data_check(&mut bio); + } } - // Same test, but as a slice. - { - let mut data : &[u8] = include_bytes!("buffered-reader-test.txt"); - let mut bio = BufferedReaderGeneric::new(&mut data, None); + // Test that buffer() returns the same data as data(). + #[test] + fn buffer_test() { + // Test vector. + let size = 10 * DEFAULT_BUF_SIZE; + let mut input = Vec::with_capacity(size); + let mut v = 0u8; + for _ in 0..size { + input.push(v); + if v == std::u8::MAX { + v = 0; + } else { + v += 1; + } + } + + let mut reader = BufferedReaderGeneric::new(&input[..], None); + + // Gather some stats to make it easier to figure out whether + // this test is working. + let stats_count = 2 * DEFAULT_BUF_SIZE; + let mut stats = vec![0usize; stats_count]; + + for i in 0..input.len() { + let data = reader.data(DEFAULT_BUF_SIZE + 1).unwrap().to_vec(); + assert!(data.len() > 0); + assert_eq!(data, reader.buffer()); + // And, we may as well check to make sure we read the + // right data. + assert_eq!(data, &input[i..i+data.len()]); + + stats[cmp::min(data.len(), stats_count - 1)] += 1; - buffered_reader_test_data_check(&mut bio); + // Consume one byte and see what happens. + reader.consume(1); + } + + if false { + for i in 0..stats.len() { + if stats[i] > 0 { + if i == stats.len() - 1 { + eprint!(">="); + } + eprintln!("{}: {}", i, stats[i]); + } + } + } } } diff --git a/buffered-reader/src/lib.rs b/buffered-reader/src/lib.rs index 82cab946..aef5c897 100644 --- a/buffered-reader/src/lib.rs +++ b/buffered-reader/src/lib.rs @@ -32,6 +32,12 @@ const DEFAULT_BUF_SIZE: usize = 8 * 1024; /// `BufferedReader` allows the caller to ensure that the internal /// buffer has a certain amount of data. pub trait BufferedReader : io::Read + fmt::Debug { + /// Returns a reference to the internal buffer. + /// + /// Note: this will return the same data as self.data(0), but it + /// does so without mutable borrowing self. + fn buffer(&self) -> &[u8]; + /// Return the data in the internal buffer. Normally, the /// returned buffer will contain *at least* `amount` bytes worth /// of data. Less data may be returned if (and only if) the end @@ -112,8 +118,8 @@ pub trait BufferedReader : io::Read + fmt::Debug { -> Result<&[u8], std::io::Error>; - // This is a convenient function that effectively combines - // data_hard() and consume(). + /// This is a convenient function that effectively combines + /// data_hard() and consume(). fn data_consume_hard(&mut self, amount: usize) -> Result<&[u8], io::Error>; /// A convenience function for reading a 16-bit unsigned integer @@ -132,7 +138,7 @@ pub trait BufferedReader : io::Read + fmt::Debug { } /// Reads and consumes `amount` bytes, and returns them in a - /// caller owned buffer. Implementations may optimize this to + /// caller-owned buffer. Implementations may optimize this to /// avoid a copy. fn steal(&mut self, amount: usize) -> Result, std::io::Error> { let mut data = self.data_consume_hard(amount)?; @@ -230,6 +236,10 @@ pub fn buffered_reader_generic_read_impl, C> /// Make a `Box` look like a BufferedReader. impl <'a, C> BufferedReader for Box + 'a> { + fn buffer(&self) -> &[u8] { + return self.as_ref().buffer(); + } + fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> { return self.as_mut().data(amount); } diff --git a/buffered-reader/src/limitor.rs b/buffered-reader/src/limitor.rs index 0bce61f9..ee8dc853 100644 --- a/buffered-reader/src/limitor.rs +++ b/buffered-reader/src/limitor.rs @@ -52,6 +52,13 @@ impl, C> io::Read for BufferedReaderLimitor { } impl, C> BufferedReader for BufferedReaderLimitor { + fn buffer(&self) -> &[u8] { + let buf = self.reader.buffer(); + &buf[..cmp::min(buf.len(), + cmp::min(std::usize::MAX as u64, + self.limit) as usize)] + } + /// Return the buffer. Ensure that it contains at least `amount` /// bytes. fn data(&mut self, amount: usize) -> Result<&[u8], io::Error> { @@ -127,86 +134,143 @@ impl, C> BufferedReader for BufferedReaderLimitor } } -#[test] -fn buffered_reader_limitor_test() { - let data : &[u8] = b"01234567890123456789"; +#[cfg(test)] +mod test { + use super::*; - /* Add a single limitor. */ - { - let mut bio : Box> - = Box::new(BufferedReaderMemory::new(data)); + #[test] + fn buffered_reader_limitor_test() { + let data : &[u8] = b"01234567890123456789"; - bio = { - let mut bio2 = Box::new(BufferedReaderLimitor::new(bio, 5)); - { - let result = bio2.data(5).unwrap(); - assert_eq!(result.len(), 5); - assert_eq!(result, &b"01234"[..]); - } - bio2.consume(5); - { - let result = bio2.data(1).unwrap(); - assert_eq!(result.len(), 0); - assert_eq!(result, &b""[..]); - } + /* Add a single limitor. */ + { + let mut bio : Box> + = Box::new(BufferedReaderMemory::new(data)); - bio2.into_inner().unwrap() - }; + bio = { + let mut bio2 = Box::new(BufferedReaderLimitor::new(bio, 5)); + { + let result = bio2.data(5).unwrap(); + assert_eq!(result.len(), 5); + assert_eq!(result, &b"01234"[..]); + } + bio2.consume(5); + { + let result = bio2.data(1).unwrap(); + assert_eq!(result.len(), 0); + assert_eq!(result, &b""[..]); + } + + bio2.into_inner().unwrap() + }; - { - { - let result = bio.data(15).unwrap(); - assert_eq!(result.len(), 15); - assert_eq!(result, &b"567890123456789"[..]); - } - bio.consume(15); { - let result = bio.data(1).unwrap(); - assert_eq!(result.len(), 0); - assert_eq!(result, &b""[..]); + { + let result = bio.data(15).unwrap(); + assert_eq!(result.len(), 15); + assert_eq!(result, &b"567890123456789"[..]); + } + bio.consume(15); + { + let result = bio.data(1).unwrap(); + assert_eq!(result.len(), 0); + assert_eq!(result, &b""[..]); + } } } - } - /* Try with two limitors where the first one imposes the real - * limit. */ - { - let mut bio : Box> - = Box::new(BufferedReaderMemory::new(data)); + /* Try with two limitors where the first one imposes the real + * limit. */ + { + let mut bio : Box> + = Box::new(BufferedReaderMemory::new(data)); + + bio = { + let bio2 : Box> + = Box::new(BufferedReaderLimitor::new(bio, 5)); + // We limit to 15 bytes, but bio2 will still limit us to 5 + // bytes. + let mut bio3 : Box> + = Box::new(BufferedReaderLimitor::new(bio2, 15)); + { + let result = bio3.data(100).unwrap(); + assert_eq!(result.len(), 5); + assert_eq!(result, &b"01234"[..]); + } + bio3.consume(5); + { + let result = bio3.data(1).unwrap(); + assert_eq!(result.len(), 0); + assert_eq!(result, &b""[..]); + } + + bio3.into_inner().unwrap().into_inner().unwrap() + }; - bio = { - let bio2 : Box> - = Box::new(BufferedReaderLimitor::new(bio, 5)); - // We limit to 15 bytes, but bio2 will still limit us to 5 - // bytes. - let mut bio3 : Box> - = Box::new(BufferedReaderLimitor::new(bio2, 15)); { - let result = bio3.data(100).unwrap(); - assert_eq!(result.len(), 5); - assert_eq!(result, &b"01234"[..]); + { + let result = bio.data(15).unwrap(); + assert_eq!(result.len(), 15); + assert_eq!(result, &b"567890123456789"[..]); + } + bio.consume(15); + { + let result = bio.data(1).unwrap(); + assert_eq!(result.len(), 0); + assert_eq!(result, &b""[..]); + } } - bio3.consume(5); - { - let result = bio3.data(1).unwrap(); - assert_eq!(result.len(), 0); - assert_eq!(result, &b""[..]); + } + } + + // Test that buffer() returns the same data as data(). + #[test] + fn buffer_test() { + // Test vector. + let size = 10 * DEFAULT_BUF_SIZE; + let mut input = Vec::with_capacity(size); + let mut v = 0u8; + for _ in 0..size { + input.push(v); + if v == std::u8::MAX { + v = 0; + } else { + v += 1; } + } - bio3.into_inner().unwrap().into_inner().unwrap() - }; + let reader = BufferedReaderGeneric::new(&input[..], None); + let size = size / 2; + let input = &input[..size]; + let mut reader = BufferedReaderLimitor::new(reader, input.len() as u64); - { - { - let result = bio.data(15).unwrap(); - assert_eq!(result.len(), 15); - assert_eq!(result, &b"567890123456789"[..]); - } - bio.consume(15); - { - let result = bio.data(1).unwrap(); - assert_eq!(result.len(), 0); - assert_eq!(result, &b""[..]); + // Gather some stats to make it easier to figure out whether + // this test is working. + let stats_count = 2 * DEFAULT_BUF_SIZE; + let mut stats = vec![0usize; stats_count]; + + for i in 0..input.len() { + let data = reader.data(DEFAULT_BUF_SIZE + 1).unwrap().to_vec(); + assert!(data.len() > 0); + assert_eq!(data, reader.buffer()); + // And, we may as well check to make sure we read the + // right data. + assert_eq!(data, &input[i..i+data.len()]); + + stats[cmp::min(data.len(), stats_count - 1)] += 1; + + // Consume one byte and see what happens. + reader.consume(1); + } + + if false { + for i in 0..stats.len() { + if stats[i] > 0 { + if i == stats.len() - 1 { + eprint!(">="); + } + eprintln!("{}: {}", i, stats[i]); + } } } } diff --git a/buffered-reader/src/memory.rs b/buffered-reader/src/memory.rs index 5bbc2e13..83de83a8 100644 --- a/buffered-reader/src/memory.rs +++ b/buffered-reader/src/memory.rs @@ -63,6 +63,10 @@ impl<'a, C> io::Read for BufferedReaderMemory<'a, C> { } impl<'a, C> BufferedReader for BufferedReaderMemory<'a, C> { + fn buffer(&self) -> &[u8] { + &self.buffer[self.cursor..] + } + /// Return the buffer. Ensure that it contains at least `amount` /// bytes. fn data(&mut self, _amount: usize) -> Result<&[u8], io::Error> { @@ -120,10 +124,46 @@ impl<'a, C> BufferedReader for BufferedReaderMemory<'a, C> { } } -#[test] -fn buffered_reader_memory_test () { - let data : &[u8] = include_bytes!("buffered-reader-test.txt"); - let mut bio = BufferedReaderMemory::new(data); +#[cfg(test)] +mod test { + use super::*; + #[test] + fn buffered_reader_memory_test () { + let data : &[u8] = include_bytes!("buffered-reader-test.txt"); + let mut bio = BufferedReaderMemory::new(data); + + buffered_reader_test_data_check(&mut bio); + } + + // Test that buffer() returns the same data as data(). + #[test] + fn buffer_test() { + // Test vector. A BufferedReaderMemory returns all unconsumed + // data. So, use a relatively small buffer size. + let size = DEFAULT_BUF_SIZE; + let mut input = Vec::with_capacity(size); + let mut v = 0u8; + for _ in 0..size { + input.push(v); + if v == std::u8::MAX { + v = 0; + } else { + v += 1; + } + } + + let mut reader = BufferedReaderMemory::new(&input[..]); + + for i in 0..input.len() { + let data = reader.data(DEFAULT_BUF_SIZE + 1).unwrap().to_vec(); + assert!(data.len() > 0); + assert_eq!(data, reader.buffer()); + // And, we may as well check to make sure we read the + // right data. + assert_eq!(data, &input[i..i+data.len()]); - buffered_reader_test_data_check(&mut bio); + // Consume one byte and see what happens. + reader.consume(1); + } + } } -- cgit v1.2.3