diff options
author | Justus Winter <justus@sequoia-pgp.org> | 2021-04-08 14:28:18 +0200 |
---|---|---|
committer | Justus Winter <justus@sequoia-pgp.org> | 2021-04-08 14:28:18 +0200 |
commit | c9695ed5a1b9f1e8f60e8d361dda7e84ad7dcbed (patch) | |
tree | da94d1fe31cf18c800c58a202f2ac682014f6e70 /openpgp | |
parent | ce74b3c668b16dd6bdb3e2c2450c17dbbd9dc815 (diff) |
WIP: openpgp: Save parsed header, use it to perfectly roundtrip.justus/fix-238
- Outstanding issues:
- Packet::gross_len must use stashed packet header
- or just use max(|stashed packet header|, |header|)???
- what about equality
- Fixes #238.
Diffstat (limited to 'openpgp')
-rw-r--r-- | openpgp/src/packet/mod.rs | 26 | ||||
-rw-r--r-- | openpgp/src/parse.rs | 5 | ||||
-rw-r--r-- | openpgp/src/serialize.rs | 255 |
3 files changed, 243 insertions, 43 deletions
diff --git a/openpgp/src/packet/mod.rs b/openpgp/src/packet/mod.rs index e23445df..fdfb14af 100644 --- a/openpgp/src/packet/mod.rs +++ b/openpgp/src/packet/mod.rs @@ -520,6 +520,11 @@ impl Arbitrary for Packet { } /// Fields used by multiple packet types. +/// +/// Parsed packets retain their header. This allows perfect +/// reproduction of the serialized form. If a packet is constructed +/// using Sequoia, or the parsed header is cleared, a packet header is +/// generated. #[derive(Debug, Clone)] pub struct Common { // In the future, this structure will hold the parsed CTB, packet @@ -532,9 +537,7 @@ pub struct Common { // derive PartialEq, Eq, PartialOrd, Ord, and Hash for most // packets. - /// XXX: Prevents trivial matching on this structure. Remove once - /// this structure actually gains some fields. - dummy: std::marker::PhantomData<()>, + header: Vec<u8>, } assert_send_and_sync!(Common); @@ -549,7 +552,7 @@ impl Arbitrary for Common { impl Default for Common { fn default() -> Common { Common { - dummy: Default::default(), + header: Vec::with_capacity(0), } } } @@ -581,6 +584,21 @@ impl std::hash::Hash for Common { } } +impl Common { + /// Returns a reference to the parsed packet header, if any. + pub fn parsed_header(&self) -> Option<&[u8]> { + if self.header.is_empty() { + None + } else { + Some(&self.header) + } + } + + /// Returns a mutable reference to the parsed packet header. + pub fn parsed_header_mut(&mut self) -> &mut Vec<u8> { + &mut self.header + } +} /// An iterator over the *contents* of a packet in depth-first order. /// diff --git a/openpgp/src/parse.rs b/openpgp/src/parse.rs index c077e81d..08b561b6 100644 --- a/openpgp/src/parse.rs +++ b/openpgp/src/parse.rs @@ -481,7 +481,7 @@ impl<'a, T: 'a + BufferedReader<Cookie>> PacketHeaderParser<T> { // Only call this function if the packet's header has been // completely and correctly parsed. If a failure occurs while // parsing the header, use `fail()` instead. - fn ok(mut self, packet: Packet) -> Result<PacketParser<'a>> { + fn ok(mut self, mut packet: Packet) -> Result<PacketParser<'a>> { let total_out = self.reader.total_out(); let mut reader = if self.state.settings.map { @@ -519,6 +519,9 @@ impl<'a, T: 'a + BufferedReader<Cookie>> PacketHeaderParser<T> { reader.data_consume_hard(total_out).unwrap(); } + // Stash the parsed header. + std::mem::swap(packet.parsed_header_mut(), &mut self.header_bytes); + Ok(PacketParser { header: self.header, packet, diff --git a/openpgp/src/serialize.rs b/openpgp/src/serialize.rs index 09a84e61..76851538 100644 --- a/openpgp/src/serialize.rs +++ b/openpgp/src/serialize.rs @@ -2554,6 +2554,81 @@ impl MarshalInto for AED1 { } } +impl Packet { + /// XXX We cannot do that if we have children, chunking... + fn may_reuse_parsed_header(&self, net_len: usize) -> bool { + may_reuse_parsed_header(net_len, self.parsed_header(), + self.container_ref()) + } +} + +impl PacketRef<'_> { + /// XXX We cannot do that if we have children, chunking... + fn may_reuse_parsed_header(&self, net_len: usize) -> bool { + may_reuse_parsed_header(net_len, self.parsed_header(), + self.container_ref()) + } +} + +/// XXX We cannot do that if we have children, chunking... +fn may_reuse_parsed_header(net_len: usize, + parsed_header: Option<&[u8]>, + container: Option<&Container>) -> bool { + tracer!(TRACE, "may_reuse_parsed_header", 0); + + let header_bytes = if let Some(h) = parsed_header { + h + } else { + t!("No parsed header"); + return false; + }; + + use crate::parse::Parse; + let header = match Header::from_bytes(header_bytes) { + Ok(h) => h, + Err(e) => { + t!("Header failed to parse: {}", e); + return false; + }, + }; + + use packet::header::BodyLength::*; + let expected_net_len = match header.length() { + Full(l) => *l as usize, + Partial(_) => { + // XXX: we could implement that if we wanted + t!("Partial body encoding, but we won't emit that"); + return false; + }, + Indeterminate => { + // Doing this safely requires more context, because we + // can only emit a packet of indeterminate length if + // we know that it is the last packet in the current + // container. + t!("Indeterminate length, we won't emit that"); + return false; + }, + }; + + if let Some(c) = container { + match c.body() { + crate::packet::Body::Unprocessed(_) => (), + _ => { + t!("Packet body has been processed"); + return false; + }, + } + } + + if net_len != expected_net_len { + t!("Packet size has changed"); + return false; + } + + t!("All checked out, reusing header"); + true +} + impl Serialize for Packet {} impl seal::Sealed for Packet {} impl Marshal for Packet { @@ -2562,20 +2637,46 @@ impl Marshal for Packet { /// This function works recursively: if the packet contains any /// packets, they are also serialized. fn serialize(&self, o: &mut dyn std::io::Write) -> Result<()> { - CTB::new(self.tag()).serialize(o)?; - // Special-case the compressed data packet, because we need // the accurate length, and CompressedData::net_len() // overestimates the size. if let Packet::CompressedData(ref p) = self { - let mut body = Vec::new(); - p.serialize(&mut body)?; - BodyLength::Full(body.len() as u32).serialize(o)?; - o.write_all(&body)?; - return Ok(()); + match p.body() { + crate::packet::Body::Unprocessed(_) => { + // Unprocessed body will be written out as is, + // therefore the NetLength implementation is + // accurate. + }, + _ => { + // The packet body has been processed, therefore + // we need to re-compress the body. In this case, + // we cannot predict the size of the resulting + // packet, therefore we need to buffer the + // compressed data first. + + // First, compress the body. + let mut body = Vec::new(); + p.serialize(&mut body)?; + + // Then, write the packet to `o`. + CTB::new(self.tag()).serialize(o)?; + BodyLength::Full(body.len() as u32).serialize(o)?; + o.write_all(&body)?; + return Ok(()); + }, + } + } + + // Avoid computing the length twice. + let net_len = self.net_len(); + + if self.may_reuse_parsed_header(net_len) { + o.write_all(&self.parsed_header().expect("checked above"))?; + } else { + CTB::new(self.tag()).serialize(o)?; + BodyLength::Full(net_len as u32).serialize(o)?; } - BodyLength::Full(self.net_len() as u32).serialize(o)?; match self { &Packet::Unknown(ref p) => p.serialize(o), &Packet::Signature(ref p) => p.serialize(o), @@ -2589,7 +2690,7 @@ impl Marshal for Packet { &Packet::UserID(ref p) => p.serialize(o), &Packet::UserAttribute(ref p) => p.serialize(o), &Packet::Literal(ref p) => p.serialize(o), - &Packet::CompressedData(_) => unreachable!("handled above"), + &Packet::CompressedData(ref p) => p.serialize(o), &Packet::PKESK(ref p) => p.serialize(o), &Packet::SKESK(ref p) => p.serialize(o), &Packet::SEIP(ref p) => p.serialize(o), @@ -2603,20 +2704,46 @@ impl Marshal for Packet { /// This function works recursively: if the packet contains any /// packets, they are also serialized. fn export(&self, o: &mut dyn std::io::Write) -> Result<()> { - CTB::new(self.tag()).serialize(o)?; - // Special-case the compressed data packet, because we need // the accurate length, and CompressedData::net_len() // overestimates the size. if let Packet::CompressedData(ref p) = self { - let mut body = Vec::new(); - p.export(&mut body)?; - BodyLength::Full(body.len() as u32).export(o)?; - o.write_all(&body)?; - return Ok(()); + match p.body() { + crate::packet::Body::Unprocessed(_) => { + // Unprocessed body will be written out as is, + // therefore the NetLength implementation is + // accurate. + }, + _ => { + // The packet body has been processed, therefore + // we need to re-compress the body. In this case, + // we cannot predict the size of the resulting + // packet, therefore we need to buffer the + // compressed data first. + + // First, compress the body. + let mut body = Vec::new(); + p.export(&mut body)?; + + // Then, write the packet to `o`. + CTB::new(self.tag()).serialize(o)?; + BodyLength::Full(body.len() as u32).serialize(o)?; + o.write_all(&body)?; + return Ok(()); + }, + } + } + + // Avoid computing the length twice. + let net_len = self.net_len(); + + if self.may_reuse_parsed_header(net_len) { + o.write_all(&self.parsed_header().expect("checked above"))?; + } else { + CTB::new(self.tag()).serialize(o)?; + BodyLength::Full(net_len as u32).serialize(o)?; } - BodyLength::Full(self.net_len() as u32).export(o)?; match self { &Packet::Unknown(ref p) => p.export(o), &Packet::Signature(ref p) => p.export(o), @@ -2630,7 +2757,7 @@ impl Marshal for Packet { &Packet::UserID(ref p) => p.export(o), &Packet::UserAttribute(ref p) => p.export(o), &Packet::Literal(ref p) => p.export(o), - &Packet::CompressedData(_) => unreachable!("handled above"), + &Packet::CompressedData(ref p) => p.export(o), &Packet::PKESK(ref p) => p.export(o), &Packet::SKESK(ref p) => p.export(o), &Packet::SEIP(ref p) => p.export(o), @@ -2811,20 +2938,46 @@ impl<'a> Marshal for PacketRef<'a> { /// This function works recursively: if the packet contains any /// packets, they are also serialized. fn serialize(&self, o: &mut dyn std::io::Write) -> Result<()> { - CTB::new(self.tag()).serialize(o)?; - // Special-case the compressed data packet, because we need // the accurate length, and CompressedData::net_len() // overestimates the size. - if let PacketRef::CompressedData(ref p) = self { - let mut body = Vec::new(); - p.serialize(&mut body)?; - BodyLength::Full(body.len() as u32).serialize(o)?; - o.write_all(&body)?; - return Ok(()); + if let PacketRef::CompressedData(p) = self { + match p.body() { + crate::packet::Body::Unprocessed(_) => { + // Unprocessed body will be written out as is, + // therefore the NetLength implementation is + // accurate. + }, + _ => { + // The packet body has been processed, therefore + // we need to re-compress the body. In this case, + // we cannot predict the size of the resulting + // packet, therefore we need to buffer the + // compressed data first. + + // First, compress the body. + let mut body = Vec::new(); + p.serialize(&mut body)?; + + // Then, write the packet to `o`. + CTB::new(self.tag()).serialize(o)?; + BodyLength::Full(body.len() as u32).serialize(o)?; + o.write_all(&body)?; + return Ok(()); + }, + } + } + + // Avoid computing the length twice. + let net_len = self.net_len(); + + if self.may_reuse_parsed_header(net_len) { + o.write_all(&self.parsed_header().expect("checked above"))?; + } else { + CTB::new(self.tag()).serialize(o)?; + BodyLength::Full(net_len as u32).serialize(o)?; } - BodyLength::Full(self.net_len() as u32).serialize(o)?; match self { PacketRef::Unknown(p) => p.serialize(o), PacketRef::Signature(p) => p.serialize(o), @@ -2838,7 +2991,7 @@ impl<'a> Marshal for PacketRef<'a> { PacketRef::UserID(p) => p.serialize(o), PacketRef::UserAttribute(p) => p.serialize(o), PacketRef::Literal(p) => p.serialize(o), - PacketRef::CompressedData(_) => unreachable!("handled above"), + PacketRef::CompressedData(p) => p.serialize(o), PacketRef::PKESK(p) => p.serialize(o), PacketRef::SKESK(p) => p.serialize(o), PacketRef::SEIP(p) => p.serialize(o), @@ -2852,20 +3005,46 @@ impl<'a> Marshal for PacketRef<'a> { /// This function works recursively: if the packet contains any /// packets, they are also serialized. fn export(&self, o: &mut dyn std::io::Write) -> Result<()> { - CTB::new(self.tag()).serialize(o)?; - // Special-case the compressed data packet, because we need // the accurate length, and CompressedData::net_len() // overestimates the size. - if let PacketRef::CompressedData(ref p) = self { - let mut body = Vec::new(); - p.export(&mut body)?; - BodyLength::Full(body.len() as u32).export(o)?; - o.write_all(&body)?; - return Ok(()); + if let PacketRef::CompressedData(p) = self { + match p.body() { + crate::packet::Body::Unprocessed(_) => { + // Unprocessed body will be written out as is, + // therefore the NetLength implementation is + // accurate. + }, + _ => { + // The packet body has been processed, therefore + // we need to re-compress the body. In this case, + // we cannot predict the size of the resulting + // packet, therefore we need to buffer the + // compressed data first. + + // First, compress the body. + let mut body = Vec::new(); + p.export(&mut body)?; + + // Then, write the packet to `o`. + CTB::new(self.tag()).serialize(o)?; + BodyLength::Full(body.len() as u32).serialize(o)?; + o.write_all(&body)?; + return Ok(()); + }, + } + } + + // Avoid computing the length twice. + let net_len = self.net_len(); + + if self.may_reuse_parsed_header(net_len) { + o.write_all(&self.parsed_header().expect("checked above"))?; + } else { + CTB::new(self.tag()).serialize(o)?; + BodyLength::Full(net_len as u32).serialize(o)?; } - BodyLength::Full(self.net_len() as u32).export(o)?; match self { PacketRef::Unknown(p) => p.export(o), PacketRef::Signature(p) => p.export(o), @@ -2879,7 +3058,7 @@ impl<'a> Marshal for PacketRef<'a> { PacketRef::UserID(p) => p.export(o), PacketRef::UserAttribute(p) => p.export(o), PacketRef::Literal(p) => p.export(o), - PacketRef::CompressedData(_) => unreachable!("handled above"), + PacketRef::CompressedData(p) => p.export(o), PacketRef::PKESK(p) => p.export(o), PacketRef::SKESK(p) => p.export(o), PacketRef::SEIP(p) => p.export(o), |