From 18b256e040881ac674463913b2a7e290125ea738 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Tue, 12 Jan 2021 10:16:20 +0100 Subject: Reimplement hash verification This patch re-implements hashing using the "ring" crypto library and implementing a streaming hashing with it. This way, we stream the file to the hasher rather than reading the full file to memory. Signed-off-by: Matthias Beyer --- Cargo.toml | 4 ++-- src/package/source.rs | 45 +++++++++++++++++++++------------------------ src/source/mod.rs | 12 ++++-------- 3 files changed, 27 insertions(+), 34 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8f3c7c5..6217efd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ clap_generate = "3.0.0-beta.2" colored = "2" config = "0.10" csv = "1.1" +data-encoding = "2.3" diesel = { version = "1.4", features = ["postgres", "chrono", "uuid", "serde_json"] } env_logger = "0.8" filters = "0.4.0" @@ -34,10 +35,9 @@ regex = "1" reqwest = { version = "0.10", features = [ "stream" ] } resiter = "0.4" result-inspect = "0.1" +ring = "0.16" serde = "1" serde_json = "1" -sha1 = { version = "0.6", features = ["std"] } -sha2 = "0.9" shiplift = { git = "https://github.com/softprops/shiplift", rev = "03cc8c075f86f1bd9e2c4e29872a0e8b9072c7f0" } syntect = "4.4" tar = "0.4" diff --git a/src/package/source.rs b/src/package/source.rs index 4d97365..4f8994b 100644 --- a/src/package/source.rs +++ b/src/package/source.rs @@ -1,3 +1,5 @@ +use std::io::Read; + use anyhow::Result; use anyhow::anyhow; use getset::Getters; @@ -33,9 +35,9 @@ pub struct SourceHash { } impl SourceHash { - pub fn matches_hash_of(&self, buf: &[u8]) -> Result<()> { + pub fn matches_hash_of(&self, reader: R) -> Result<()> { trace!("Hashing buffer with: {:?}", self.hashtype); - let h = self.hashtype.hash_buffer(&buf)?; + let h = self.hashtype.hash_from_reader(reader)?; trace!("Hashing buffer with: {} finished", self.hashtype); if h == self.value { @@ -70,29 +72,24 @@ pub enum HashType { } impl HashType { - fn hash_buffer(&self, buffer: &[u8]) -> Result { - match self { - HashType::Sha1 => { - trace!("SHA1 hashing buffer"); - let mut m = sha1::Sha1::new(); - m.update(buffer); - Ok(HashValue(m.digest().to_string())) - }, - HashType::Sha256 => { - trace!("SHA256 hashing buffer"); - //let mut m = sha2::Sha256::new(); - //m.update(buffer); - //Ok(HashValue(String::from(m.finalize()))) - unimplemented!() - }, - HashType::Sha512 => { - trace!("SHA512 hashing buffer"); - //let mut m = sha2::Sha512::new(); - //m.update(buffer); - //Ok(HashValue(String::from(m.finalize()))) - unimplemented!() - }, + fn hash_from_reader(&self, mut reader: R) -> Result { + use ring::digest::{Context, SHA1_FOR_LEGACY_USE_ONLY, SHA256, SHA512}; + let mut context = match self { + HashType::Sha1 => Context::new(&SHA1_FOR_LEGACY_USE_ONLY), + HashType::Sha256 => Context::new(&SHA256), + HashType::Sha512 => Context::new(&SHA512), + }; + let mut buffer = [0; 1024]; + + loop { + let count = reader.read(&mut buffer)?; + if count == 0 { + break; + } + context.update(&buffer[..count]); } + + Ok(HashValue(data_encoding::HEXLOWER.encode(context.finish().as_ref()))) } } diff --git a/src/source/mod.rs b/src/source/mod.rs index 20671bc..734b89e 100644 --- a/src/source/mod.rs +++ b/src/source/mod.rs @@ -86,24 +86,20 @@ impl SourceEntry { trace!("Reading to buffer: {}", p.display()); let path = p.clone(); - let buf = tokio::task::spawn_blocking(move || { - use std::io::Read; - - let mut buf = vec![]; + let reader = tokio::task::spawn_blocking(move || { std::fs::OpenOptions::new() .create(false) .create_new(false) .read(true) - .open(path)? - .read_to_end(&mut buf) - .map(|_| buf) + .open(path) + .map(std::io::BufReader::new) }) .await??; trace!("Reading to buffer finished: {}", p.display()); self.package_source .hash() - .matches_hash_of(&buf) + .matches_hash_of(reader) } pub async fn create(&self) -> Result { -- cgit v1.2.3 From c3fc1281142ec10414197a31070cc45930a859e3 Mon Sep 17 00:00:00 2001 From: Matthias Beyer Date: Tue, 12 Jan 2021 10:35:01 +0100 Subject: Move hashing itself into blocking task This patch, as a followup to 18b256e040881ac674463913b2a7e290125ea738 ("Reimplement hash verification") moves the hashing itself into the blocking closure, so that we get maximum parallelism here. Signed-off-by: Matthias Beyer --- src/source/mod.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/src/source/mod.rs b/src/source/mod.rs index 734b89e..ebc60dd 100644 --- a/src/source/mod.rs +++ b/src/source/mod.rs @@ -81,25 +81,29 @@ impl SourceEntry { } pub async fn verify_hash(&self) -> Result<()> { - let p = self.source_file_path(); - trace!("Reading to buffer: {}", p.display()); + trace!("Reading: {}", p.display()); + + // we can clone() here, because the object itself is just a representation of "what hash + // type do we use here", which is rather cheap to clone (because it is + // crate::package::SourceHash, that is not more than an enum + String). + // + // We need to clone to move into the closure below. + let source_hash = self.package_source.hash().clone(); - let path = p.clone(); - let reader = tokio::task::spawn_blocking(move || { + tokio::task::spawn_blocking(move || { std::fs::OpenOptions::new() .create(false) .create_new(false) .read(true) - .open(path) + .open(&p) + .map_err(Error::from) .map(std::io::BufReader::new) + .and_then(|reader| { + source_hash.matches_hash_of(reader) + }) }) - .await??; - - trace!("Reading to buffer finished: {}", p.display()); - self.package_source - .hash() - .matches_hash_of(reader) + .await? } pub async fn create(&self) -> Result { -- cgit v1.2.3