summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeal H. Walfield <neal@pep.foundation>2018-07-11 12:02:02 +0200
committerNeal H. Walfield <neal@pep.foundation>2018-07-11 12:02:02 +0200
commita3abfd6bfd399bd4f366f47b527d9cd68957c018 (patch)
tree7f0c08067273c4859e6e7625f4c549a28cd92fc9
parent5b311f141e87c813c2c031894fa07bc6a32da0c0 (diff)
openpgp: Instead of using an ad-hoc parser, use a parser generator.
- Instead of using an ad-hoc parser for recognizing OpenPGP messages, use lalrpop, an lr(1) parser generator, for recognizing OpenPGP messages.
-rw-r--r--openpgp/Cargo.toml5
-rw-r--r--openpgp/build.rs10
-rw-r--r--openpgp/src/lib.rs2
-rw-r--r--openpgp/src/message/grammar.lalrpop67
-rw-r--r--openpgp/src/message/lexer.rs125
-rw-r--r--openpgp/src/message/mod.rs (renamed from openpgp/src/message.rs)331
6 files changed, 342 insertions, 198 deletions
diff --git a/openpgp/Cargo.toml b/openpgp/Cargo.toml
index acf33a3c..22774d05 100644
--- a/openpgp/Cargo.toml
+++ b/openpgp/Cargo.toml
@@ -2,6 +2,7 @@
name = "openpgp"
version = "0.1.0"
authors = ["Neal H. Walfield <neal@gnu.org>"]
+build = "build.rs"
[dependencies]
buffered-reader = { path = "../buffered-reader", default-features = false }
@@ -9,10 +10,14 @@ base64 = "0.9.0"
bzip2 = { version = "0.3.2", optional = true }
failure = "0.1.1"
flate2 = { version = "1.0.1", optional = true }
+lalrpop-util = "0.15.2"
nettle = { git = "https://gitlab.com/sequoia-pgp/nettle-rs.git" }
quickcheck = "0.6"
time = "0.1.38"
+[build-dependencies]
+lalrpop = "0.15.2"
+
[features]
default = ["compression"]
diff --git a/openpgp/build.rs b/openpgp/build.rs
new file mode 100644
index 00000000..b18f693b
--- /dev/null
+++ b/openpgp/build.rs
@@ -0,0 +1,10 @@
+extern crate lalrpop;
+
+// Rerun if any of these files change:
+#[allow(dead_code)]
+const SOURCE: &'static str
+ = include_str!("src/message/grammar.lalrpop");
+
+fn main() {
+ lalrpop::process_root().unwrap();
+}
diff --git a/openpgp/src/lib.rs b/openpgp/src/lib.rs
index fb727b2e..371cb3a0 100644
--- a/openpgp/src/lib.rs
+++ b/openpgp/src/lib.rs
@@ -40,6 +40,8 @@
#![warn(missing_docs)]
+extern crate lalrpop_util;
+
#[macro_use]
extern crate failure;
diff --git a/openpgp/src/message/grammar.lalrpop b/openpgp/src/message/grammar.lalrpop
new file mode 100644
index 00000000..4eebd18b
--- /dev/null
+++ b/openpgp/src/message/grammar.lalrpop
@@ -0,0 +1,67 @@
+// -*- mode: Rust; -*-
+
+use message::lexer;
+
+grammar;
+
+pub Message: () = {
+ LITERAL,
+ CompressedData,
+ EncryptedPart,
+ SignedPart,
+ OPAQUE_CONTENT,
+};
+
+CompressedData: () = {
+ COMPRESSED_DATA Message POP
+};
+
+SeipPart: () = {
+ SEIP Message POP,
+}
+
+// An encrypted part is 0 or more ESKs followed by a SEIP packet.
+EncryptedPart: () = {
+ SeipPart,
+ ESKS SeipPart,
+};
+
+ESKS: () = {
+ ESK,
+ ESKS ESK,
+};
+
+ESK: () = {
+ PKESK,
+ SKESK,
+};
+
+// Signatures bracket a message like so:
+//
+// OPS OPS Message SIG SIG
+//
+// or, there are 1 or more signatures preceding a Message (this is an
+// artifact of old PGP versions):
+//
+// SIG SIG Message
+SignedPart: () = {
+ SIG Message,
+ OPS Message SIG,
+}
+
+extern {
+ type Location = usize;
+ type Error = lexer::LexicalError;
+
+ enum lexer::Token {
+ LITERAL => lexer::Token::Literal,
+ COMPRESSED_DATA => lexer::Token::CompressedData,
+ SKESK => lexer::Token::SKESK,
+ PKESK => lexer::Token::PKESK,
+ SEIP => lexer::Token::SEIP,
+ OPS => lexer::Token::OPS,
+ SIG => lexer::Token::SIG,
+ POP => lexer::Token::Pop,
+ OPAQUE_CONTENT => lexer::Token::OpaqueContent,
+ }
+}
diff --git a/openpgp/src/message/lexer.rs b/openpgp/src/message/lexer.rs
new file mode 100644
index 00000000..442399d4
--- /dev/null
+++ b/openpgp/src/message/lexer.rs
@@ -0,0 +1,125 @@
+use Error;
+use Result;
+
+use Packet;
+use PacketPile;
+
+// The type of the parser's input.
+//
+// The parser iterators over tuples consisting of the token's starting
+// position, the token itself, and the token's ending position.
+pub(crate) type LexerItem<Tok, Loc, Error>
+ = ::std::result::Result<(Loc, Tok, Loc), Error>;
+
+#[derive(Debug, Clone)]
+pub enum Token {
+ Literal,
+ CompressedData,
+
+ SKESK,
+ PKESK,
+ SEIP,
+
+ OPS,
+ SIG,
+
+ Pop,
+
+ // This represents the content of a container that is not parsed.
+ OpaqueContent,
+}
+
+#[derive(Debug)]
+pub enum LexicalError {
+ // There are no lexing errors.
+}
+
+pub(crate) enum Lexer<'input> {
+ Refed(Box<Iterator<Item=(usize, &'input Token)> + 'input>),
+ Owned(Box<Iterator<Item=(usize, Token)> + 'input>),
+}
+
+impl<'input> Iterator for Lexer<'input> {
+ type Item = LexerItem<Token, usize, LexicalError>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let n = match self {
+ Lexer::Refed(ref mut i) =>
+ i.next().map(|(pos, tok)| (pos, tok.clone())),
+ Lexer::Owned(ref mut i) => i.next(),
+ };
+
+ if let Some((pos, tok)) = n {
+ Some(Ok((pos, tok, pos)))
+ } else {
+ None
+ }
+ }
+}
+
+impl<'input> Lexer<'input> {
+ /// Uses a raw sequence of tokens as input to the parser.
+ // This is only used in the test code. It would be better to use
+ // cfg(test), but then we have to do the same for the Lexer enum
+ // above and then we also have to specialize Lexer::next(). This
+ // is significantly less ugly.
+ #[allow(unused)]
+ pub(crate) fn from_tokens(raw: &'input [Token]) -> Self {
+ let iter = raw.iter().enumerate();
+ Lexer::Refed(Box::new(iter))
+ }
+
+ /// Uses a `PacketPile` as input to the parser.
+ pub(crate) fn from_packet_pile(pp: &'input PacketPile) -> Result<Self> {
+ let mut t = vec![];
+ let mut last_path = vec![0];
+
+ for (path, p) in pp.descendants().paths() {
+ if last_path.len() > path.len() {
+ // We popped one or more containers.
+ for _ in 1..last_path.len() - path.len() + 1 {
+ t.push(Token::Pop);
+ }
+ }
+ last_path = path;
+
+ match p {
+ Packet::Literal(_) => t.push(Token::Literal),
+ Packet::CompressedData(_) => t.push(Token::CompressedData),
+ Packet::SKESK(_) => t.push(Token::SKESK),
+ Packet::PKESK(_) => t.push(Token::PKESK),
+ Packet::SEIP(_) => t.push(Token::SEIP),
+ Packet::OnePassSig(_) => t.push(Token::OPS),
+ Packet::Signature(_) => t.push(Token::SIG),
+
+ p =>
+ return Err(Error::MalformedMessage(
+ format!("Invalid OpenPGP message: \
+ unexpected packet: {:?}",
+ p.tag()).into()).into()),
+ }
+
+ match p {
+ Packet::CompressedData(_) | Packet::SEIP(_) => {
+ // If a container's content is not unpacked, then
+ // we treat the content as an opaque message.
+
+ if p.children.is_none() && p.body.is_some() {
+ t.push(Token::OpaqueContent);
+ t.push(Token::Pop);
+ }
+ }
+ _ => {}
+ }
+ }
+
+ if last_path.len() > 1 {
+ // We popped one or more containers.
+ for _ in 1..last_path.len() {
+ t.push(Token::Pop);
+ }
+ }
+
+ Ok(Lexer::Owned(Box::new(t.into_iter().enumerate())))
+ }
+}
diff --git a/openpgp/src/message.rs b/openpgp/src/message/mod.rs
index dc2148a4..0651390a 100644
--- a/openpgp/src/message.rs
+++ b/openpgp/src/message/mod.rs
@@ -1,14 +1,18 @@
use std::fmt;
use std::path::Path;
-use std::iter;
use Result;
use Error;
-use Tag;
use Packet;
use PacketPile;
use Message;
+mod lexer;
+mod grammar;
+
+use self::lexer::Lexer;
+use self::grammar::MessageParser;
+
impl fmt::Debug for Message {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("Message")
@@ -17,200 +21,6 @@ impl fmt::Debug for Message {
}
}
-// This is a helper function to signal that an `PacketPile` is not a
-// `Message`.
-macro_rules! bad {
- ($msg:expr) => ({
- return Err(Error::MalformedMessage(
- format!("Invalid OpenPGP message: {}", $msg.to_string())
- .into()).into())
- });
-}
-
-// The grammar for an encrypt part is:
-//
-// ESK :- Public-Key Encrypted Session Key Packet |
-// Symmetric-Key Encrypted Session Key Packet.
-//
-// ESK Sequence :- ESK | ESK Sequence, ESK.
-//
-// Encrypted Data :- Symmetrically Encrypted Data Packet |
-// Symmetrically Encrypted Integrity Protected Data Packet
-//
-// Encrypted Message :- Encrypted Data | ESK Sequence, Encrypted Data.
-//
-// See https://tools.ietf.org/html/rfc4880#section-11.3
-//
-// In other words: zero or more ESKs followed by exactly one SEIP or
-// SED packet.
-fn is_encrypted_part<'a, I>(mut po: Option<&'a Packet>, mut iter: I,
- depth: usize)
- -> Result<()>
- where I: Iterator<Item=&'a Packet>
-{
- if po.is_none() {
- po = iter.next();
- }
-
- while let Some(p) = po {
- // We match by tag so that we correctly handle Unknown
- // packets.
- match p.tag() {
- Tag::PKESK | Tag::SKESK => (),
-
- tag @ Tag::SEIP | tag @ Tag::SED => {
- // This has to be the last packet.
- let tail : Vec<&Packet> = iter.collect();
- if tail.len() > 0 {
- bad!(format!(
- "{} should be the last packet in an encrypted part, \
- but followed by {} packets ({:?}).",
- tag, tail.len(),
- tail.iter().map(|p| p.tag()).collect::<Vec<Tag>>()));
- }
-
- // XXX: We assume that if a SEIP or SED packet has a
- // body, then the body is encrypted.
- if p.body.is_some() {
- return Ok(());
- } else if let Some(ref children) = p.children {
- return is_message(None, children.children(), depth + 1);
- } else {
- bad!("an encrypted part cannot be empty.");
- }
- },
-
- tag @ _ =>
- bad!(format!("while parsing an encrypted part: \
- unexpected packet ({})",
- tag)),
- }
-
- po = iter.next();
- }
-
- bad!("encrypted part missing a SEIP or SED packet.");
-}
-
-fn is_one_pass_signed_part<'a, I>(mut po: Option<&'a Packet>, mut iter: I,
- depth: usize)
- -> Result<()>
- where I: Iterator<Item=&'a Packet>
-{
- if po.is_none() {
- po = iter.next();
- }
-
- let mut ops = 0;
- let mut saw_message = false;
-
- while let Some(p) = po {
- // We match by tag so that we correctly handle Unknown
- // packets.
- match p.tag() {
- Tag::OnePassSig => {
- if saw_message {
- bad!("One Pass Signature packet should not follow \
- a message.");
- }
- ops += 1;
- },
- Tag::Signature => {
- if !saw_message {
- bad!("Signature packet encountered \
- before a signed message.");
- }
- if ops == 0 {
- bad!("Unbalanced signature: more Signature than \
- One Pass Signature packets.");
- }
- ops -= 1;
- }
- _ => {
- if saw_message {
- bad!("A signature is only allowed over a single message.");
- }
- saw_message = true;
- is_message(Some(p), iter::empty(), depth + 1)?
- },
- }
-
- po = iter.next();
- }
-
- if !(ops == 0 && saw_message) {
- bad!(format!("Unbalanced signature: missing {} signature packets",
- ops));
- }
-
- Ok(())
-}
-
-fn is_message<'a, I>(mut po: Option<&'a Packet>, mut iter: I, depth: usize)
- -> Result<()>
- where I: Iterator<Item=&'a Packet>
-{
- if po.is_none() {
- po = iter.next();
- }
-
- let tag = po.and_then(|p| Some(p.tag()));
-
- match tag {
- None =>
- bad!("an empty message is not a valid OpenPGP message."),
-
- Some(Tag::PublicKey) =>
- bad!("it appears to be a TPK."),
-
- Some(Tag::SecretKey) =>
- bad!("it appears to be a TSK."),
-
- Some(Tag::PKESK) | Some(Tag::SKESK)
- | Some(Tag::SEIP) | Some(Tag::SED) =>
- is_encrypted_part(po, iter, depth + 1),
-
- Some(Tag::OnePassSig) =>
- is_one_pass_signed_part(po, iter, depth + 1),
-
- Some(Tag::Signature) => {
- // Signature Packet, OpenPGP Message
- is_message(None, iter, depth + 1)
- },
-
- Some(Tag::CompressedData) => {
- if iter.next().is_some() {
- bad!("a compressed packet may not be \
- followed by another packet.");
- }
-
- let p = po.unwrap();
- if p.body.is_some() {
- // XXX: The body is still compressed. Assume it is
- // okay.
- Ok(())
- } else if let Some(ref children) = p.children {
- is_message(None, children.children(), depth + 1)
- } else {
- bad!("empty compressed data packet.");
- }
- },
-
- Some(Tag::Literal) => {
- if iter.next().is_some() {
- bad!("a literal packet may not be \
- followed by another packet.");
- }
-
- Ok(())
- },
-
- _ => {
- bad!(format!("{:?} is invalid.", tag));
- },
- }
-}
-
impl Message {
/// Converts the `PacketPile` to a `Message`.
///
@@ -225,8 +35,15 @@ impl Message {
///
/// [Section 11.3 of RFC 4880]: https://tools.ietf.org/html/rfc4880#section-11.3
pub fn from_packet_pile(pile: PacketPile) -> Result<Self> {
- is_message(None, pile.children(), 0)
- .and_then(|_| Ok(Message { pile: pile } ))
+ let r = MessageParser::new().parse(Lexer::from_packet_pile(&pile)?);
+ match r {
+ Ok(_) => Ok(Message { pile: pile }),
+ /// We really want to squash the lexer's error: it is an
+ /// internal detail that may change, and meaningless even
+ /// to an immediate user of this crate.
+ Err(err) => Err(Error::MalformedMessage(
+ format!("Invalid OpenPGP message: {:?}", err).into()).into())
+ }
}
/// Converts the vector of `Packets` to a `Message`.
@@ -265,6 +82,7 @@ mod tests {
use SignatureType;
use s2k::S2K;
use mpis::MPIs;
+ use Tag;
use CompressedData;
use Literal;
use OnePassSig;
@@ -276,6 +94,123 @@ mod tests {
use Container;
#[test]
+ fn tokens() {
+ use self::lexer::{Token, Lexer};
+ use self::lexer::Token::*;
+ use self::grammar::MessageParser;
+
+ struct TestVector<'a> {
+ s: &'a [Token],
+ result: bool,
+ }
+
+ let test_vectors = [
+ TestVector {
+ s: &[Literal][..],
+ result: true,
+ },
+ TestVector {
+ s: &[CompressedData, Literal, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[CompressedData, CompressedData, Literal,
+ Pop, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[SEIP, Literal, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[CompressedData, SEIP, Literal, Pop, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[CompressedData, SEIP, CompressedData, Literal,
+ Pop, Pop, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[SEIP, Pop],
+ result: false,
+ },
+ TestVector {
+ s: &[SKESK, SEIP, Literal, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[PKESK, SEIP, Literal, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[SKESK, SKESK, SEIP, Literal, Pop],
+ result: true,
+ },
+
+ TestVector {
+ s: &[OPS, Literal, SIG],
+ result: true,
+ },
+ TestVector {
+ s: &[OPS, OPS, Literal, SIG, SIG],
+ result: true,
+ },
+ TestVector {
+ s: &[OPS, OPS, Literal, SIG],
+ result: false,
+ },
+ TestVector {
+ s: &[OPS, OPS, SEIP, OPS, SEIP, Literal, Pop,
+ SIG, Pop, SIG, SIG],
+ result: true,
+ },
+
+ TestVector {
+ s: &[CompressedData, OpaqueContent],
+ result: false,
+ },
+ TestVector {
+ s: &[CompressedData, OpaqueContent, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[CompressedData, CompressedData, OpaqueContent, Pop, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[SEIP, CompressedData, OpaqueContent, Pop, Pop],
+ result: true,
+ },
+ TestVector {
+ s: &[SEIP, OpaqueContent, Pop],
+ result: true,
+ },
+ ];
+
+ for v in test_vectors.into_iter() {
+ eprintln!("Parsing: {:?}", v.s);
+ match MessageParser::new().parse(Lexer::from_tokens(v.s))
+ {
+ Ok(r) => {
+ println!("Parsed as {:?} {}",
+ r,
+ if v.result { "(expected)" }
+ else { "UNEXPECTED!" });
+ assert!(v.result);
+ },
+ Err(e) => {
+ println!("Parse error: {:?} {}",
+ e,
+ if v.result { "UNEXPECTED!" }
+ else { "(expected)" });
+ assert!(! v.result);
+ }
+ }
+ }
+ }
+
+ #[test]
fn basic() {
// Empty.
// => bad.