diff options
author | Justus Winter <justus@sequoia-pgp.org> | 2021-11-04 15:58:03 +0100 |
---|---|---|
committer | Justus Winter <justus@sequoia-pgp.org> | 2021-11-04 16:23:48 +0100 |
commit | 17cd68fc56385a1db2bd0ddbb43cda5bd012e7c1 (patch) | |
tree | 057e17393ee2fd26447247c6818e7da0c595ea0e | |
parent | 62be3957d51efb2191b56c97c732583d02caf1ae (diff) |
openpgp: Use XXH3 to hash packet bodies.
- When we stream packet bodies, we hash their contents so that we
can compare them later on, even if we no longer have the data.
Previously, we used the fasted hash from the SHA2 family, either
SHA256 or SHA512 depending on the architecture.
- That, however, turned out to be a major performance problem. When
decrypting a non-compressed, binary file on amd64, we spent
roughly a third of the time just to compute the hash.
- Using the non-cryptographic hash function XXH3, we can greatly
improve the performance. On my system, it is 30x as fast as SHA3,
and reduces the overhead of computing the body hash considerably:
% time ./sq-sha512 decrypt --recipient-key juliet.key.pgp 3g-for-juliet.binary.pgp >/dev/null 2>&1
13.931 total
% time ./sq-xxh3 decrypt --recipient-key juliet.key.pgp 3g-for-juliet.binary.pgp >/dev/null 2>&1
9.264 total
- See #771.
-rw-r--r-- | Cargo.lock | 7 | ||||
-rw-r--r-- | openpgp/Cargo.toml | 1 | ||||
-rw-r--r-- | openpgp/src/packet/container.rs | 41 | ||||
-rw-r--r-- | openpgp/src/parse.rs | 4 |
4 files changed, 25 insertions, 28 deletions
@@ -2875,6 +2875,7 @@ dependencies = [ "win-crypto-ng", "winapi 0.3.9", "x25519-dalek", + "xxhash-rust", ] [[package]] @@ -4007,6 +4008,12 @@ dependencies = [ ] [[package]] +name = "xxhash-rust" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575e15bedf6e57b5c2d763ffc6c3c760143466cbd09d762d539680ab5992ded" + +[[package]] name = "yaml-rust" version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/openpgp/Cargo.toml b/openpgp/Cargo.toml index 6614a4ef..770cd0d8 100644 --- a/openpgp/Cargo.toml +++ b/openpgp/Cargo.toml @@ -42,6 +42,7 @@ regex = "1" regex-syntax = "0.6" sha1collisiondetection = { version = "0.2.3", default-features = false, features = ["std"] } thiserror = "1.0.2" +xxhash-rust = { version = "0.8", features = ["xxh3"] } backtrace = "0.3.3" # RustCrypto crates. diff --git a/openpgp/src/packet/container.rs b/openpgp/src/packet/container.rs index 15e24a50..41e9ca31 100644 --- a/openpgp/src/packet/container.rs +++ b/openpgp/src/packet/container.rs @@ -8,11 +8,11 @@ use std::hash::{Hash, Hasher}; use std::slice; use std::vec; +use xxhash_rust::xxh3::Xxh3; + use crate::{ Packet, - crypto::hash, packet::Iter, - types::HashAlgorithm, }; /// A packet's body holds either unprocessed bytes, processed bytes, @@ -126,7 +126,7 @@ pub struct Container { body: Body, /// We compute a digest over the body to implement comparison. - body_digest: Vec<u8>, + body_digest: u64, } assert_send_and_sync!(Container); @@ -138,14 +138,6 @@ impl std::ops::Deref for Container { } } -// Pick the fastest hash function from the SHA2 family for the -// architectures word size. On 64-bit architectures, SHA512 is almost -// twice as fast, but on 32-bit ones, SHA256 is faster. -#[cfg(target_pointer_width = "64")] -const CONTAINER_BODY_HASH: HashAlgorithm = HashAlgorithm::SHA512; -#[cfg(not(target_pointer_width = "64"))] -const CONTAINER_BODY_HASH: HashAlgorithm = HashAlgorithm::SHA256; - impl PartialEq for Container { fn eq(&self, other: &Container) -> bool { use Body::*; @@ -177,7 +169,7 @@ impl Default for Container { fn default() -> Self { Self { body: Body::Structured(Vec::with_capacity(0)), - body_digest: Vec::with_capacity(0), + body_digest: 0, } } } @@ -186,7 +178,7 @@ impl From<Vec<Packet>> for Container { fn from(packets: Vec<Packet>) -> Self { Self { body: Body::Structured(packets), - body_digest: Vec::with_capacity(0), + body_digest: 0, } } } @@ -304,36 +296,31 @@ impl Container { } /// Returns the hash for the empty body. - fn empty_body_digest() -> Vec<u8> { + fn empty_body_digest() -> u64 { lazy_static::lazy_static!{ - static ref DIGEST: Vec<u8> = { - let mut h = Container::make_body_hash(); - let mut d = vec![0; h.digest_size()]; - let _ = h.digest(&mut d); - d + static ref DIGEST: u64 = { + Container::make_body_hash().digest() }; } - DIGEST.clone() + *DIGEST } /// Creates a hash context for hashing the body. pub(crate) // For parse.rs - fn make_body_hash() -> Box<dyn hash::Digest> { - CONTAINER_BODY_HASH.context() - .expect("CONTAINER_BODY_HASH must be implemented") + fn make_body_hash() -> Box<Xxh3> { + Box::new(Xxh3::new()) } /// Hashes content that has been streamed. pub(crate) // For parse.rs - fn set_body_hash(&mut self, mut h: Box<dyn hash::Digest>) { - self.body_digest.resize(h.digest_size(), 0); - let _ = h.digest(&mut self.body_digest); + fn set_body_hash(&mut self, h: Box<Xxh3>) { + self.body_digest = h.digest(); } pub(crate) fn body_digest(&self) -> String { - crate::fmt::hex::encode(&self.body_digest) + format!("{:08X}", self.body_digest) } // Converts an indentation level to whitespace. diff --git a/openpgp/src/parse.rs b/openpgp/src/parse.rs index 55489be7..326793fb 100644 --- a/openpgp/src/parse.rs +++ b/openpgp/src/parse.rs @@ -183,6 +183,8 @@ use std::fmt; use std::path::Path; use std::result::Result as StdResult; +use xxhash_rust::xxh3::Xxh3; + use ::buffered_reader::*; use crate::{ @@ -3309,7 +3311,7 @@ pub struct PacketParser<'a> { /// We compute a hashsum over the body to implement comparison on /// containers that have been streamed. - body_hash: Option<Box<dyn crate::crypto::hash::Digest>>, + body_hash: Option<Box<Xxh3>>, state: PacketParserState, } |