summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJustus Winter <justus@sequoia-pgp.org>2021-11-04 15:58:03 +0100
committerJustus Winter <justus@sequoia-pgp.org>2021-11-04 16:23:48 +0100
commit17cd68fc56385a1db2bd0ddbb43cda5bd012e7c1 (patch)
tree057e17393ee2fd26447247c6818e7da0c595ea0e
parent62be3957d51efb2191b56c97c732583d02caf1ae (diff)
openpgp: Use XXH3 to hash packet bodies.
- When we stream packet bodies, we hash their contents so that we can compare them later on, even if we no longer have the data. Previously, we used the fasted hash from the SHA2 family, either SHA256 or SHA512 depending on the architecture. - That, however, turned out to be a major performance problem. When decrypting a non-compressed, binary file on amd64, we spent roughly a third of the time just to compute the hash. - Using the non-cryptographic hash function XXH3, we can greatly improve the performance. On my system, it is 30x as fast as SHA3, and reduces the overhead of computing the body hash considerably: % time ./sq-sha512 decrypt --recipient-key juliet.key.pgp 3g-for-juliet.binary.pgp >/dev/null 2>&1 13.931 total % time ./sq-xxh3 decrypt --recipient-key juliet.key.pgp 3g-for-juliet.binary.pgp >/dev/null 2>&1 9.264 total - See #771.
-rw-r--r--Cargo.lock7
-rw-r--r--openpgp/Cargo.toml1
-rw-r--r--openpgp/src/packet/container.rs41
-rw-r--r--openpgp/src/parse.rs4
4 files changed, 25 insertions, 28 deletions
diff --git a/Cargo.lock b/Cargo.lock
index d08a8d3f..3c66ff3f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2875,6 +2875,7 @@ dependencies = [
"win-crypto-ng",
"winapi 0.3.9",
"x25519-dalek",
+ "xxhash-rust",
]
[[package]]
@@ -4007,6 +4008,12 @@ dependencies = [
]
[[package]]
+name = "xxhash-rust"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e575e15bedf6e57b5c2d763ffc6c3c760143466cbd09d762d539680ab5992ded"
+
+[[package]]
name = "yaml-rust"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/openpgp/Cargo.toml b/openpgp/Cargo.toml
index 6614a4ef..770cd0d8 100644
--- a/openpgp/Cargo.toml
+++ b/openpgp/Cargo.toml
@@ -42,6 +42,7 @@ regex = "1"
regex-syntax = "0.6"
sha1collisiondetection = { version = "0.2.3", default-features = false, features = ["std"] }
thiserror = "1.0.2"
+xxhash-rust = { version = "0.8", features = ["xxh3"] }
backtrace = "0.3.3"
# RustCrypto crates.
diff --git a/openpgp/src/packet/container.rs b/openpgp/src/packet/container.rs
index 15e24a50..41e9ca31 100644
--- a/openpgp/src/packet/container.rs
+++ b/openpgp/src/packet/container.rs
@@ -8,11 +8,11 @@ use std::hash::{Hash, Hasher};
use std::slice;
use std::vec;
+use xxhash_rust::xxh3::Xxh3;
+
use crate::{
Packet,
- crypto::hash,
packet::Iter,
- types::HashAlgorithm,
};
/// A packet's body holds either unprocessed bytes, processed bytes,
@@ -126,7 +126,7 @@ pub struct Container {
body: Body,
/// We compute a digest over the body to implement comparison.
- body_digest: Vec<u8>,
+ body_digest: u64,
}
assert_send_and_sync!(Container);
@@ -138,14 +138,6 @@ impl std::ops::Deref for Container {
}
}
-// Pick the fastest hash function from the SHA2 family for the
-// architectures word size. On 64-bit architectures, SHA512 is almost
-// twice as fast, but on 32-bit ones, SHA256 is faster.
-#[cfg(target_pointer_width = "64")]
-const CONTAINER_BODY_HASH: HashAlgorithm = HashAlgorithm::SHA512;
-#[cfg(not(target_pointer_width = "64"))]
-const CONTAINER_BODY_HASH: HashAlgorithm = HashAlgorithm::SHA256;
-
impl PartialEq for Container {
fn eq(&self, other: &Container) -> bool {
use Body::*;
@@ -177,7 +169,7 @@ impl Default for Container {
fn default() -> Self {
Self {
body: Body::Structured(Vec::with_capacity(0)),
- body_digest: Vec::with_capacity(0),
+ body_digest: 0,
}
}
}
@@ -186,7 +178,7 @@ impl From<Vec<Packet>> for Container {
fn from(packets: Vec<Packet>) -> Self {
Self {
body: Body::Structured(packets),
- body_digest: Vec::with_capacity(0),
+ body_digest: 0,
}
}
}
@@ -304,36 +296,31 @@ impl Container {
}
/// Returns the hash for the empty body.
- fn empty_body_digest() -> Vec<u8> {
+ fn empty_body_digest() -> u64 {
lazy_static::lazy_static!{
- static ref DIGEST: Vec<u8> = {
- let mut h = Container::make_body_hash();
- let mut d = vec![0; h.digest_size()];
- let _ = h.digest(&mut d);
- d
+ static ref DIGEST: u64 = {
+ Container::make_body_hash().digest()
};
}
- DIGEST.clone()
+ *DIGEST
}
/// Creates a hash context for hashing the body.
pub(crate) // For parse.rs
- fn make_body_hash() -> Box<dyn hash::Digest> {
- CONTAINER_BODY_HASH.context()
- .expect("CONTAINER_BODY_HASH must be implemented")
+ fn make_body_hash() -> Box<Xxh3> {
+ Box::new(Xxh3::new())
}
/// Hashes content that has been streamed.
pub(crate) // For parse.rs
- fn set_body_hash(&mut self, mut h: Box<dyn hash::Digest>) {
- self.body_digest.resize(h.digest_size(), 0);
- let _ = h.digest(&mut self.body_digest);
+ fn set_body_hash(&mut self, h: Box<Xxh3>) {
+ self.body_digest = h.digest();
}
pub(crate)
fn body_digest(&self) -> String {
- crate::fmt::hex::encode(&self.body_digest)
+ format!("{:08X}", self.body_digest)
}
// Converts an indentation level to whitespace.
diff --git a/openpgp/src/parse.rs b/openpgp/src/parse.rs
index 55489be7..326793fb 100644
--- a/openpgp/src/parse.rs
+++ b/openpgp/src/parse.rs
@@ -183,6 +183,8 @@ use std::fmt;
use std::path::Path;
use std::result::Result as StdResult;
+use xxhash_rust::xxh3::Xxh3;
+
use ::buffered_reader::*;
use crate::{
@@ -3309,7 +3311,7 @@ pub struct PacketParser<'a> {
/// We compute a hashsum over the body to implement comparison on
/// containers that have been streamed.
- body_hash: Option<Box<dyn crate::crypto::hash::Digest>>,
+ body_hash: Option<Box<Xxh3>>,
state: PacketParserState,
}