diff options
author | Neal H. Walfield <neal@pep.foundation> | 2020-03-26 16:28:10 +0100 |
---|---|---|
committer | Neal H. Walfield <neal@pep.foundation> | 2020-03-26 16:28:10 +0100 |
commit | 59be7cc83ea359f4d11f9e492f3d132a465fee72 (patch) | |
tree | 10893944ec31d95916f056597e502e5484e66f29 /openpgp/src/parse.rs | |
parent | 09fdfda9be3fbc9ec9264c3a1420520ef48e2eb4 (diff) |
openpgp: Merge parse/mod.rs and parse/parse.rs into parse.rs.
- Remove the 'include!' hack.
- In Rust 2018, 'foo/mod.rs' can also be called 'foo.rs'. This
solves the problem that the hack was trying to workaround.
Diffstat (limited to 'openpgp/src/parse.rs')
-rw-r--r-- | openpgp/src/parse.rs | 4765 |
1 files changed, 4765 insertions, 0 deletions
diff --git a/openpgp/src/parse.rs b/openpgp/src/parse.rs new file mode 100644 index 00000000..89bcabe4 --- /dev/null +++ b/openpgp/src/parse.rs @@ -0,0 +1,4765 @@ +//! OpenPGP packet parser. +//! +//! An OpenPGP message is a sequence of packets. Some of the packets +//! contain other packets. These containers include encrypted packets +//! (the SED and SEIP packets), and compressed packets. This +//! structure results in a tree, which is laid out in depth-first +//! order. +//! +//! There are two major concerns that inform the design of the parsing +//! API. +//! +//! First, when processing a container, it is possible to either +//! recurse into the container, and process its children, or treat the +//! contents of the container as an opaque byte stream, and process +//! the packet following the container. The low-level +//! [`PacketParser`] and mid-level [`PacketPileParser`] abstractions +//! allow the caller to choose the behavior by either calling the +//! `recurse()` method or the `next()` method, as appropriate. +//! OpenPGP doesn't impose any restrictions on the amount of nesting. +//! So, to prevent a denial of service attack, the parsers doesn't +//! recurse more than `MAX_RECURSION_DEPTH` times, by default. +//! +//! Second, packets can contain an effectively unbounded amount of +//! data. To avoid errors due to memory exhaustion, the +//! [`PacketParser`] and [`PacketPileParser`] abstractions support +//! parsing packets in a streaming manner, i.e., never buffering more +//! than O(1) bytes of data. To do this, the parsers initially only +//! parse a packet's header (which is rarely more than a few kilobytes +//! of data), and return control to the caller. After inspecting that +//! data, the caller can decide how to handle the packet's contents. +//! If the content is deemed interesting, it can be streamed or +//! buffered. Otherwise, it can be dropped. Streaming is possible +//! not only for literal data packets, but also containers (other +//! packets also support the interface, but just return EOF). For +//! instance, encryption can be stripped by saving the decrypted +//! content of an encryption packet, which is just an OpenPGP message. +//! +//! We explicitly chose to not use a callback-based API, but something +//! that is closer to Rust's iterator API. Unfortunately, because a +//! [`PacketParser`] needs mutable access to the input stream (so that +//! the content can be streamed), only a single [`PacketParser`] item +//! can be live at a time (without a fair amount of unsafe nastiness). +//! This is incompatible with Rust's iterator concept, which allows +//! any number of items to be live at any time. For instance: +//! +//! ```rust +//! let mut v = vec![1, 2, 3, 4]; +//! let mut iter = v.iter_mut(); +//! +//! let x = iter.next().unwrap(); +//! let y = iter.next().unwrap(); +//! +//! *x += 10; // This does not cause an error! +//! *y += 10; +//! ``` +//! +//! This crate provide three abstractions for parsing OpenPGP +//! messages: +//! +//! - The [`PacketParser`] abstraction produces one packet at a +//! time. What is done with those packets is completely up to the +//! caller. +//! +//! - The [`PacketPileParser`] abstraction builds on the +//! [`PacketParser`] abstraction and provides a similar interface. +//! However, after each iteration, the `PacketPileParser` adds the +//! packet to a [`PacketPile`], which is returned once the packets are +//! completely processed. +//! +//! This interface should only be used if the caller actually +//! wants a `PacketPile`; if the OpenPGP message is parsed in place, +//! then using a `PacketParser` is better. +//! +//! - The [`PacketPile::from_file`] (and related methods) is the most +//! convenient, but least flexible way to parse a sequence of OpenPGP +//! packets. Whereas a `PacketPileParser` allows the caller to +//! determine how to handle individual packets, the +//! [`PacketPile::from_file`] parses the whole message at once and +//! returns a [`PacketPile`]. +//! +//! This interface should only be used if the caller is certain +//! that the parsed message will fit in memory. +//! +//! In all cases, the default behavior can be configured using a +//! [`PacketParserBuilder`]. +//! +//! [`PacketParser`]: struct.PacketParser.html +//! [`PacketPileParser`]: struct.PacketPileParser.html +//! [`PacketPile`]: ../struct.PacketPile.html +//! [`PacketPile::from_file`]: ../struct.PacketPile.html#method.from_file +//! [`PacketParserBuilder`]: struct.PacketParserBuilder.html +use std; +use std::io; +use std::io::prelude::*; +use std::cmp; +use std::str; +use std::mem; +use std::fmt; +use std::path::Path; + +use ::buffered_reader::*; + +use crate::{ + cert::prelude::*, + crypto::{aead, hash::Hash}, + Result, + packet::header::{ + CTB, + BodyLength, + PacketLengthType, + }, + crypto::S2K, + Error, + packet::{ + Container, + Header, + }, + packet::signature::Signature4, + packet::prelude::*, + Packet, + Fingerprint, + KeyID, + crypto::SessionKey, +}; +use crate::types::{ + AEADAlgorithm, + CompressionAlgorithm, + Features, + HashAlgorithm, + KeyFlags, + KeyServerPreferences, + PublicKeyAlgorithm, + RevocationKey, + SignatureType, + SymmetricAlgorithm, + Timestamp, +}; +use crate::crypto::{self, mpis::{PublicKey, MPI}}; +use crate::crypto::symmetric::{Decryptor, BufferedReaderDecryptor}; +use crate::message; +use crate::message::MessageValidator; + +mod partial_body; +use self::partial_body::BufferedReaderPartialBodyFilter; + +use crate::packet::signature::subpacket::{ + NotationData, + Subpacket, + SubpacketArea, + SubpacketLength, + SubpacketTag, + SubpacketValue, +}; + +mod packet_pile_parser; +pub use self::packet_pile_parser::PacketPileParser; + +mod hashed_reader; +pub(crate) use self::hashed_reader::HashedReader; + +mod packet_parser_builder; +pub use self::packet_parser_builder::{Dearmor, PacketParserBuilder}; + +pub mod map; +mod mpis; +mod sexp; +pub mod stream; + +// Whether to trace execution by default (on stderr). +const TRACE : bool = false; + +/// Parsing of packets and related structures. +/// +/// This is a uniform interface to parse packets, messages, keys, and +/// related data structures. +pub trait Parse<'a, T> { + /// Reads from the given reader. + fn from_reader<R: 'a + Read>(reader: R) -> Result<T>; + + /// Reads from the given file. + /// + /// The default implementation just uses [`from_reader(..)`], but + /// implementations can provide their own specialized version. + /// + /// [`from_reader(..)`]: #tymethod.from_reader + fn from_file<P: AsRef<Path>>(path: P) -> Result<T> + { + Self::from_reader(::std::fs::File::open(path)?) + } + + /// Reads from the given slice. + /// + /// The default implementation just uses [`from_reader(..)`], but + /// implementations can provide their own specialized version. + /// + /// [`from_reader(..)`]: #tymethod.from_reader + fn from_bytes<D: AsRef<[u8]> + ?Sized>(data: &'a D) -> Result<T> { + Self::from_reader(io::Cursor::new(data)) + } +} + +macro_rules! impl_parse_generic_packet { + ($typ: ident) => { + impl<'a> Parse<'a, $typ> for $typ { + fn from_reader<R: 'a + Read>(reader: R) -> Result<Self> { + let bio = buffered_reader::Generic::with_cookie( + reader, None, Cookie::default()); + let parser = PacketHeaderParser::new_naked(bio); + + let mut pp = Self::parse(parser)?; + pp.buffer_unread_content()?; + + match pp.next()? { + (Packet::$typ(o), PacketParserResult::EOF(_)) + => Ok(o), + (p, PacketParserResult::EOF(_)) => + Err(Error::InvalidOperation( + format!("Not a {} packet: {:?}", stringify!($typ), + p)).into()), + (_, PacketParserResult::Some(_)) => + Err(Error::InvalidOperation( + "Excess data after packet".into()).into()), + } + } + } + }; +} + +/// The default amount of acceptable nesting. Typically, we expect a +/// message to looking like: +/// +/// [ encryption container: [ signature: [ compressioned data: [ literal data ]]]] +/// +/// So, this should be more than enough. +const MAX_RECURSION_DEPTH : u8 = 16; + +/// The default maximum size of non-container packets. +/// +/// Packets that exceed this limit will be returned as +/// `Packet::Unknown`, with the error set to `Error::PacketTooLarge`. +/// +/// This limit applies to any packet type that is *not* a container +/// packet, i.e. any packet that is not a literal data packet, a +/// compressed data packet, a symmetrically encrypted data packet, or +/// an AEAD encrypted data packet. +/// +/// The default is 1 MiB. +const MAX_PACKET_SIZE: u32 = 1 << 20; // 1 MiB + +// Used to parse an OpenPGP packet's header (note: in this case, the +// header means a Packet's fixed data, not the OpenPGP framing +// information, such as the CTB, and length information). +// +// This struct is not exposed to the user. Instead, when a header has +// been successfully parsed, a `PacketParser` is returned. +pub(crate) struct PacketHeaderParser<T: BufferedReader<Cookie>> { + // The reader stack wrapped in a buffered_reader::Dup so that if + // there is a parse error, we can abort and still return an + // Unknown packet. + reader: buffered_reader::Dup<T, Cookie>, + + // The current packet's header. + header: Header, + header_bytes: Vec<u8>, + + // This packet's path. + path: Vec<usize>, + + // The `PacketParser`'s state. + state: PacketParserState, + + /// A map of this packet. + map: Option<map::Map>, +} + +/// Creates a local marco called php_try! that returns an Unknown +/// packet instead of an Error like try! on parsing-related errors. +/// (Errors like read errors are still returned as usual.) +/// +/// If you want to fail like this in a non-try! context, use +/// php.fail("reason"). +macro_rules! make_php_try { + ($parser:expr) => { + macro_rules! php_try { + ($e:expr) => { + match $e { + Ok(b) => { + Ok(b) + }, + Err(e) => { + let e = match e.downcast::<io::Error>() { + Ok(e) => + if let io::ErrorKind::UnexpectedEof = e.kind() { + return $parser.error(e.into()); + } else { + e.into() + }, + Err(e) => e, + }; + let e = match e.downcast::<Error>() { + Ok(e) => match e { + Error::MalformedMPI(_) => + return $parser.error(e.into()), + _ => + e.into(), + }, + Err(e) => e, + }; + + Err(e) + }, + }? + }; + } + }; +} + +impl<T: BufferedReader<Cookie>> std::fmt::Debug for PacketHeaderParser<T> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("PacketHeaderParser") + .field("header", &self.header) + .field("path", &self.path) + .field("reader", &self.reader) + .field("state", &self.state) + .field("map", &self.map) + .finish() + } +} + +impl<'a, T: 'a + BufferedReader<Cookie>> PacketHeaderParser<T> { + // Returns a `PacketHeaderParser` to parse an OpenPGP packet. + // `inner` points to the start of the OpenPGP framing information, + // i.e., the CTB. + fn new(inner: T, + state: PacketParserState, + path: Vec<usize>, header: Header, + header_bytes: Vec<u8>) -> Self + { + assert!(path.len() > 0); + + let mut cookie = Cookie::default(); + cookie.level = inner.cookie_ref().level; + let map = if state.settings.map { + Some(map::Map::new(header_bytes.clone())) + } else { + None + }; + PacketHeaderParser { + reader: buffered_reader::Dup::with_cookie(inner, cookie), + header: header, + header_bytes: header_bytes, + path: path, + state: state, + map: map, + } + } + + // Returns a `PacketHeaderParser` that parses a bare packet. That + // is, `inner` points to the start of the packet; the OpenPGP + // framing has already been processed, and `inner` already + // includes any required filters (e.g., a + // `BufferedReaderPartialBodyFilter`, etc.). + fn new_naked(inner: T) -> Self { + PacketHeaderParser::new(inner, + PacketParserState::new(Default::default()), + vec![ 0 ], + Header::new(CTB::new(Tag::Reserved), + BodyLength::Full(0)), + Vec::new()) + } + + // Consumes the bytes belonging to the packet's header (i.e., the + // number of bytes read) from the reader, and returns a + // `PacketParser` that can be returned to the user. + // + // Only call this function if the packet's header has been + // completely and correctly parsed. If a failure occurs while + // parsing the header, use `fail()` instead. + fn ok(mut self, packet: Packet) -> Result<PacketParser<'a>> { + let total_out = self.reader.total_out(); + + let mut reader = if self.state.settings.map { + // Read the body for the map. Note that + // `total_out` does not account for the body. + // + // XXX avoid the extra copy. + let body = self.reader.steal_eof()?; + if body.len() > 0 { + self.field("body", body.len()); + } + + // This is a buffered_reader::Dup, so this always has an + // inner. + let inner = Box::new(self.reader).into_inner().unwrap(); + + // Combine the header with the body for the map. + let mut data = Vec::with_capacity(total_out + body.len()); + // We know that the inner reader must have at least + // `total_out` bytes buffered, otherwise we could never + // have read that much from the `buffered_reader::Dup`. + data.extend_from_slice(&inner.buffer()[..total_out]); + data.extend(body); + self.map.as_mut().unwrap().finalize(data); + + inner + } else { + // This is a buffered_reader::Dup, so this always has an + // inner. + Box::new(self.reader).into_inner().unwrap() + }; + + // We know the data has been read, so this cannot fail. + reader.data_consume_hard(total_out).unwrap(); + + Ok(PacketParser { + header: self.header, + packet: packet, + path: self.path, + last_path: vec![], + reader: reader, + content_was_read: false, + decrypted: true, + finished: false, + map: self.map, + body_hash: None, + state: self.state, + }) + } + + // Something went wrong while parsing the packet's header. Aborts + // and returns an Unknown packet instead. + fn fail(self, reason: &'static str) -> Result<PacketParser<'a>> { + self.error(Error::MalformedPacket(reason.into()).into()) + } + + fn error(mut self, error: anyhow::Error) -> Result<PacketParser<'a>> { + // Rewind the dup reader, so that the caller has a chance to + // buffer the whole body of the unknown packet. + self.reader.rewind(); + Unknown::parse(self, error) + } + + fn field(&mut self, name: &'static str, size: usize) { + if let Some(ref mut map) = self.map { + map.add(name, size) + } + } + + fn parse_u8(&mut self, name: &'static str) -> Result<u8> { + self.field(name, 1); + Ok(self.reader.data_consume_hard(1)?[0]) + } + + fn parse_be_u16(&mut self, name: &'static str) -> Result<u16> { + self.field(name, 2); + Ok(self.reader.read_be_u16()?) + } + + fn parse_be_u32(&mut self, name: &'static str) -> Result<u32> { + self.field(name, 4); + Ok(self.reader.read_be_u32()?) + } + + fn parse_bool(&mut self, name: &'static str) -> Result<bool> { + self.field(name, 1); + let v = self.reader.data_consume_hard(1)?[0]; + match v { + 0 => Ok(false), + 1 => Ok(true), + n => Err(Error::MalformedPacket( + format!("Invalid value for bool: {}", n)).into()), + } + } + + fn parse_bytes(&mut self, name: &'static str, amount: usize) + -> Result<Vec<u8>> { + self.field(name, amount); + Ok(self.reader.steal(amount)?) + } + + fn parse_bytes_eof(&mut self, name: &'static str) -> Result<Vec<u8>> { + let r = self.reader.steal_eof()?; + self.field(name, r.len()); + Ok(r) + } + + fn recursion_depth(&self) -> isize { + self.path.len() as isize - 1 + } +} + + +/// What the hash in the Cookie is for. +#[derive(Copy, Clone, PartialEq, Debug)] +pub(crate) enum HashesFor { + Nothing, + MDC, + Signature, +} + +/// Controls whether or not a hashed reader hashes data. +#[derive(Copy, Clone, PartialEq, Debug)] +enum Hashing { + /// Hashing is enabled. + Enabled, + /// Hashing is enabled for notarized signatures. + Notarized, + /// Hashing is disabled. + Disabled, +} + + +#[derive(Debug)] +pub(crate) struct Cookie { + // `BufferedReader`s managed by a `PacketParser` have + // `Some(level)`; an external `BufferedReader` (i.e., the + // underlying `BufferedReader`) has no level. + // + // Before parsing a top-level packet, we may push a + // `buffered_reader::Limitor` in front of the external + // `BufferedReader`. Such `BufferedReader`s are assigned a level + // of 0. + // + // When a top-level packet (i.e., a packet with a recursion depth + // of 0) reads from the `BufferedReader` stack, the top + // `BufferedReader` will have a level of at most 0. + // + // If the top-level packet is a container, say, a `CompressedData` + // packet, then it pushes a decompression filter with a level of 0 + // onto the `BufferedReader` stack, and it recursively invokes the + // parser. + // + // When the parser encounters the `CompressedData`'s first child, + // say, a `Literal` packet, it pushes a `buffered_reader::Limitor` on + // the `BufferedReader` stack with a level of 1. Then, a + // `PacketParser` for the `Literal` data packet is created with a + // recursion depth of 1. + // + // There are several things to note: + // + // - When a `PacketParser` with a recursion depth of N reads + // from the `BufferedReader` stack, the top `BufferedReader`'s + // level is (at most) N. + // + // - Because we sometimes don't need to push a limitor + // (specifically, when the length is indeterminate), the + // `BufferedReader` at the top of the stack may have a level + // less than the current `PacketParser`'s recursion depth. + // + // - When a packet at depth N is a container that filters the + // data, it pushes a `BufferedReader` at level N onto the + // `BufferedReader` stack. + // + // - When we finish parsing a packet at depth N, we pop all + // `BufferedReader`s from the `BufferedReader` stack that are + // at level N. The intuition is: the `BufferedReaders` at + // level N are associated with the packet at depth N. + // + // - If a OnePassSig packet occurs at the top level, then we + // need to push a HashedReader above the current level. The + // top level is level 0, thus we push the HashedReader at + // level -1. + level: Option<isize>, + + hashes_for: HashesFor, + hashing: Hashing, + + /// Keeps track of whether the last one pass signature packet had + /// the last flag set. + saw_last: bool, + sig_groups: Vec<SignatureGroup>, + /// Keep track of the maximal size of sig_groups to compute + /// signature levels. + sig_groups_max_len: usize, + + /// Stashed bytes that need to be hashed. + /// + /// When checking nested signatures, we need to hash the framing. + /// However, at the time we know that we want to hash it, it has + /// already been consumed. Deferring the consumption of headers + /// failed due to complications with the partial body decoder + /// eagerly consuming data. I (Justus) decided that doing the + /// right thing is not worth the trouble, at least for now. Also, + /// hash stash sounds funny. + hash_stash: Option<Vec<u8>>, + + /// Whether this `BufferedReader` is actually an interior EOF in a + /// container. + /// + /// This is used by the SEIP parser to prevent a child packet from + /// accidentally swallowing the trailing MDC packet. This can + /// happen when there is a compressed data packet with an + /// indeterminate body length encoding. In this case, due to + /// buffering, the decompressor consumes data beyond the end of + /// the compressed data. + /// + /// When set, buffered_reader_pop_stack will return early when it + /// encounters a fake EOF at the level it is popping to. + fake_eof: bool, +} + +/// Contains hashes for consecutive one pass signature packets ending +/// in one with the last flag set. +pub(crate) struct SignatureGroup { + /// Counts the number of one pass signature packets this group is + /// for. Once this drops to zero, we pop the group from the + /// stack. + ops_count: usize, + + /// The hash contexts. + pub(crate) hashes: Vec<crypto::hash::Context>, +} + +impl fmt::Debug for SignatureGroup { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let algos = self.hashes.iter().map(|ctx| ctx.algo()) + .collect::<Vec<_>>(); + + f.debug_struct("Cookie") + .field("ops_count", &self.ops_count) + .field("hashes", &algos) + .finish() + } +} + +impl Default for SignatureGroup { + fn default() -> Self { + SignatureGroup { + ops_count: 0, + hashes: Default::default(), + } + } +} + +impl SignatureGroup { + /// Clears the signature group. + fn clear(&mut self) { + self.ops_count = 0; + self.hashes.clear(); + } +} + +impl Default for Cookie { + fn default() -> Self { + Cookie { + level: None, + hashing: Hashing::Enabled, + hashes_for: HashesFor::Nothing, + saw_last: false, + sig_groups: vec![Default::default()], + sig_groups_max_len: 1, + hash_stash: None, + fake_eof: false, + } + } +} + +impl Cookie { + fn new(level: isize) -> Cookie { + Cookie { + level: Some(level), + hashing: Hashing::Enabled, + hashes_for: HashesFor::Nothing, + saw_last: false, + sig_groups: vec![Default::default()], + sig_groups_max_len: 1, + hash_stash: None, + fake_eof: false, + } + } + + /// Returns a reference to the topmost signature group. + pub(crate) fn sig_group(&self) -> &SignatureGroup { + assert!(self.sig_groups.len() > 0); + &self.sig_groups[self.sig_groups.len() - 1] + } + + /// Returns a mutable reference to the topmost signature group. + pub(crate) fn sig_group_mut(&mut self) -> &mut SignatureGroup { + assert!(self.sig_groups.len() > 0); + let len = self.sig_groups.len(); + &mut self.sig_groups[len - 1] + } + + /// Returns the level of the currently parsed signature. + fn signature_level(&self) -> usize { + // The signature with the deepest "nesting" is closest to the + // data, and hence level 0. + self.sig_groups_max_len - self.sig_groups.len() + } + + /// Tests whether the topmost signature group is no longer used. + fn sig_group_unused(&self) -> bool { + assert!(self.sig_groups.len() > 0); + self.sig_groups[self.sig_groups.len() - 1].ops_count == 0 + } + + /// Pushes a new signature group to the stack. + fn sig_group_push(&mut self) { + self.sig_groups.push(Default::default()); + self.sig_groups_max_len += 1; + } + + /// Pops a signature group from the stack. + fn sig_group_pop(&mut self) { + if self.sig_groups.len() == 1 { + // Don't pop the last one, just clear it. + self.sig_groups[0].clear(); + self.hashes_for = HashesFor::Nothing; + } else { + self.sig_groups.pop(); + } + } +} + +impl Cookie { + // Enables or disables signature hashers (HashesFor::Signature) at + // level `level`. + // + // Thus to disable the hashing of a level 3 literal packet's + // meta-data, we disable hashing at level 2. + fn hashing(reader: &mut dyn BufferedReader<Cookie>, + how: Hashing, level: isize) { + let mut reader : Option<&mut dyn BufferedReader<Cookie>> + = Some(reader); + while let Some(r) = reader { + { + let cookie = r.cookie_mut(); + if let Some(br_level) = cookie.level { + if br_level < level { + break; + } + if br_level == level + && cookie.hashes_for == HashesFor::Signature { + cookie.hashing = how; + } + } else { + break; + } + } + reader = r.get_mut(); + } + } + + // A helpful debugging aid to pretty print a Buffered Reader + // stack. + #[allow(dead_code)] + fn dump(reader: &dyn BufferedReader<Cookie>) { + let mut i = 1; + let mut reader : Option<&dyn BufferedReader<Cookie>> = Some(reader); + while let Some(r) = reader { + { + let cookie = r.cookie_ref(); + + eprint!(" {}. {}, level: {:?}", + i, r, cookie.level); + if cookie.hashes_for != HashesFor::Nothing { + eprint!(", hashes for: {:?}", cookie.hashes_for); + } + eprint!("\n"); + } + reader = r.get_ref(); + i = i + 1; + } + } +} + +// Pops readers from a buffered reader stack at the specified level. +fn buffered_reader_stack_pop<'a>( + mut reader: Box<dyn BufferedReader<Cookie> + 'a>, depth: isize) + -> Result<(bool, Box<dyn BufferedReader<Cookie> + 'a>)> +{ + tracer!(TRACE, |