summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJon Moroney <darakian@gmail.com>2019-12-18 14:14:17 -0800
committerJon Moroney <darakian@gmail.com>2019-12-22 10:38:54 -1000
commit701ef480b2bd2434e0bfe2294d28ece8a7c870ee (patch)
tree3e29befbfa87e5586912774ea3b95396c2dc2a2d
parent0e31253de383fa9d05f14dc61a22f030753c2a73 (diff)
Basic removal with arbitrary 64KB block choice
-rw-r--r--src/lib.rs15
1 files changed, 10 insertions, 5 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 625d23e..e008560 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,7 +4,7 @@
use std::hash::{Hasher};
use std::fs::{self, DirEntry};
-use std::io::{Read, BufReader};
+use std::io::{Read};
use std::path::{PathBuf, Path};
use std::cmp::Ordering;
use serde_derive::{Serialize};
@@ -15,6 +15,8 @@ use std::collections::hash_map::{HashMap, Entry};
use std::io::{Error, ErrorKind};
use nohash_hasher::IntMap;
+const BLOCK_SIZE: usize = 4096;
+
#[derive(PartialEq)]
enum HashMode{
Full,
@@ -77,11 +79,14 @@ impl Fileinfo{
.next()
.expect("Cannot read file path from struct")
) {
- Ok(f) => {
- let mut buffer_reader = BufReader::new(f);
- let mut hash_buffer = [0;4096];
+ Ok(mut f) => {
+ /* We want a read call to be "large" for two reasons
+ 1) Force filesystem read ahead behavior
+ 2) Fewer system calls for a given file.
+ Currently 64KB which is half of a default RHEL read ahead buffer */
+ let mut hash_buffer = [0;BLOCK_SIZE * 16];
loop {
- match buffer_reader.read(&mut hash_buffer) {
+ match f.read(&mut hash_buffer) {
Ok(n) if n>0 => hasher.write(&hash_buffer[0..]),
Ok(n) if n==0 => break,
Err(_e) => {