From 3367828e487fa79d2ec0a615e6b15dd5fbee5c9a Mon Sep 17 00:00:00 2001
From: "Neal H. Walfield" <neal@pep.foundation>
Date: Fri, 8 Jan 2021 14:29:22 +0100
Subject: openpgp: Add regex support.

  - Fixes #188.
---
 Cargo.lock                        |    1 +
 openpgp/Cargo.toml                |    1 +
 openpgp/src/lib.rs                |    1 +
 openpgp/src/regex/grammar.lalrpop |  204 ++++
 openpgp/src/regex/lexer.rs        |  222 +++++
 openpgp/src/regex/mod.rs          | 1943 +++++++++++++++++++++++++++++++++++++
 6 files changed, 2372 insertions(+)
 create mode 100644 openpgp/src/regex/grammar.lalrpop
 create mode 100644 openpgp/src/regex/lexer.rs
 create mode 100644 openpgp/src/regex/mod.rs
diff --git a/Cargo.lock b/Cargo.lock
index 4bbfe62d..9d5a238f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2027,6 +2027,7 @@ dependencies = [
  "quickcheck",
  "rand",
  "regex",
+ "regex-syntax",
  "rpassword",
  "sha1collisiondetection",
  "thiserror",
diff --git a/openpgp/Cargo.toml b/openpgp/Cargo.toml
index a56ce401..35827295 100644
--- a/openpgp/Cargo.toml
+++ b/openpgp/Cargo.toml
@@ -38,6 +38,7 @@ libc = "0.2.66"
 memsec = { version = ">=0.5", default-features = false }
 nettle = { version = "7", optional = true }
 regex = "1"
+regex-syntax = "0.6"
 sha1collisiondetection = { version = "0.2.3", default-features = false, features = ["std"] }
 thiserror = "1.0.2"
 backtrace = "0.3.3"
diff --git a/openpgp/src/lib.rs b/openpgp/src/lib.rs
index 7670f736..da44fb4c 100644
--- a/openpgp/src/lib.rs
+++ b/openpgp/src/lib.rs
@@ -159,6 +159,7 @@ mod keyid;
 pub use keyid::KeyID;
 mod keyhandle;
 pub use keyhandle::KeyHandle;
+pub mod regex;
 pub mod policy;
 
 pub(crate) mod seal;
diff --git a/openpgp/src/regex/grammar.lalrpop b/openpgp/src/regex/grammar.lalrpop
new file mode 100644
index 00000000..e9e619b5
--- /dev/null
+++ b/openpgp/src/regex/grammar.lalrpop
@@ -0,0 +1,204 @@
+// -*- mode: Rust; -*-
+
+use super::generate_class;
+use super::lexer;
+use super::lexer::{Token, LexicalError};
+use regex_syntax::hir::{self, Hir};
+
+// Pass in the original, untokenized input to facilitate error
+// recovery.
+grammar<'input>(input: &'input str);
+
+// This is a straightforward translation of the regular expression
+// grammar from section 8 of RFC 4880.
+//
+// https://tools.ietf.org/html/rfc4880#section-8
+pub(crate) Regex : Hir = {
+    <l:LBranch> <r:RBranch*> => {
+        let mut r = r;
+        r.insert(0, l);
+        Hir::alternation(r)
+    },
+}
+
+LBranch : Hir = {
+    Branch,
+}
+
+RBranch : Hir = {
+    PIPE <Branch>,
+}
+
+Branch : Hir = {
+    <p:Piece*> => {
+        hir::Hir::group(hir::Group {
+            kind: hir::GroupKind::NonCapturing,
+            hir: Box::new(hir::Hir::concat(p)),
+        })
+    },
+}
+
+Piece : Hir = {
+    <a:Atom> => a,
+    <a:Atom> STAR => {
+        hir::Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrMore,
+            greedy: true,
+            hir: Box::new(a)
+        })
+    },
+    <a:Atom> PLUS => {
+        hir::Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::OneOrMore,
+            greedy: true,
+            hir: Box::new(a)
+        })
+    },
+    <a:Atom> QUESTION => {
+        hir::Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrOne,
+            greedy: true,
+            hir: Box::new(a)
+        })
+    },
+}
+
+Atom : Hir = {
+    LPAREN <r:Regex> RPAREN => {
+        hir::Hir::group(hir::Group {
+            kind: hir::GroupKind::NonCapturing,
+            hir: Box::new(r),
+        })
+    },
+
+    Range,
+
+    DOT => {
+        hir::Hir::any(false)
+    },
+    CARET => {
+        hir::Hir::anchor(hir::Anchor::StartText)
+    },
+    DOLLAR => {
+        hir::Hir::anchor(hir::Anchor::EndText)
+    },
+
+    BACKSLASH <t:AnyChar> => {
+        hir::Hir::literal(hir::Literal::Unicode(t.to_char()))
+    },
+
+    <t:OTHER> => {
+        hir::Hir::literal(hir::Literal::Unicode(t.to_char()))
+    },
+
+}
+
+Range : Hir = {
+    LBRACKET <c:CARET?> <class1:RBRACKET> <class2:NotRBracket*> RBRACKET => {
+        generate_class(c.is_some(),
+                       std::iter::once(class1.to_char())
+                       .chain(class2.into_iter().map(|t| t.to_char())))
+    },
+    LBRACKET CARET <class:NotRBracket+> RBRACKET => {
+        generate_class(true,
+                       class.into_iter().map(|t| t.to_char()))
+    },
+    LBRACKET <class1:NotCaretNotRBracket> <class2:NotRBracket*> RBRACKET => {
+        generate_class(false,
+                       std::iter::once(class1.to_char())
+                       .chain(class2.into_iter().map(|t| t.to_char())))
+    },
+}
+
+NotRBracket : Token = {
+    PIPE => Token::OTHER('|'),
+
+    STAR => Token::OTHER('*'),
+    PLUS => Token::OTHER('+'),
+    QUESTION => Token::OTHER('?'),
+
+    LPAREN => Token::OTHER('('),
+    RPAREN => Token::OTHER(')'),
+
+    DOT => Token::OTHER('.'),
+    CARET => Token::OTHER('^'),
+    DOLLAR => Token::OTHER('$'),
+    BACKSLASH => Token::OTHER('\\'),
+
+    LBRACKET => Token::OTHER('['),
+    // RBRACKET => Token::OTHER(']'),
+    DASH => Token::OTHER('-'),
+
+    OTHER,
+}
+
+NotCaretNotRBracket : Token = {
+    PIPE => Token::OTHER('|'),
+
+    STAR => Token::OTHER('*'),
+    PLUS => Token::OTHER('+'),
+    QUESTION => Token::OTHER('?'),
+
+    LPAREN => Token::OTHER('('),
+    RPAREN => Token::OTHER(')'),
+
+    DOT => Token::OTHER('.'),
+    // CARET => Token::OTHER('^'),
+    DOLLAR => Token::OTHER('$'),
+    BACKSLASH => Token::OTHER('\\'),
+
+    LBRACKET => Token::OTHER('['),
+    // RBRACKET => Token::OTHER(']'),
+    DASH => Token::OTHER('-'),
+
+    OTHER,
+}
+
+AnyChar : Token = {
+    PIPE => Token::OTHER('|'),
+
+    STAR => Token::OTHER('*'),
+    PLUS => Token::OTHER('+'),
+    QUESTION => Token::OTHER('?'),
+
+    LPAREN => Token::OTHER('('),
+    RPAREN => Token::OTHER(')'),
+
+    DOT => Token::OTHER('.'),
+    CARET => Token::OTHER('^'),
+    DOLLAR => Token::OTHER('$'),
+    BACKSLASH => Token::OTHER('\\'),
+
+    LBRACKET => Token::OTHER('['),
+    RBRACKET => Token::OTHER(']'),
+    DASH => Token::OTHER('-'),
+
+    OTHER,
+}
+
+extern {
+    type Location = usize;
+    type Error = LexicalError;
+
+    enum lexer::Token {
+        PIPE => lexer::Token::PIPE,
+
+        STAR => lexer::Token::STAR,
+        PLUS => lexer::Token::PLUS,
+        QUESTION => lexer::Token::QUESTION,
+
+        LPAREN => lexer::Token::LPAREN,
+        RPAREN => lexer::Token::RPAREN,
+
+        DOT => lexer::Token::DOT,
+        CARET => lexer::Token::CARET,
+        DOLLAR => lexer::Token::DOLLAR,
+        BACKSLASH => lexer::Token::BACKSLASH,
+
+        LBRACKET => lexer::Token::LBRACKET,
+        RBRACKET => lexer::Token::RBRACKET,
+        DASH => lexer::Token::DASH,
+
+        OTHER => lexer::Token::OTHER(_),
+    }
+}
diff --git a/openpgp/src/regex/lexer.rs b/openpgp/src/regex/lexer.rs
new file mode 100644
index 00000000..18300d90
--- /dev/null
+++ b/openpgp/src/regex/lexer.rs
@@ -0,0 +1,222 @@
+use std::fmt;
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum LexicalError {
+}
+
+impl fmt::Display for LexicalError {
+    // This trait requires `fmt` with this exact signature.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", "{}")
+    }
+}
+
+pub type Spanned<Token, Loc, LexicalError>
+    = Result<(Loc, Token, Loc), LexicalError>;
+
+// The type of the parser's input.
+//
+// The parser iterators over tuples consisting of the token's starting
+// position, the token itself, and the token's ending position.
+pub(crate) type LexerItem<Token, Loc, LexicalError>
+    = Spanned<Token, Loc, LexicalError>;
+
+/// The components of an OpenPGP Message.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Token {
+    PIPE,
+
+    STAR,
+    PLUS,
+    QUESTION,
+
+    LPAREN,
+    RPAREN,
+
+    DOT,
+    CARET,
+    DOLLAR,
+    BACKSLASH,
+
+    LBRACKET,
+    RBRACKET,
+    DASH,
+
+    OTHER(char),
+}
+assert_send_and_sync!(Token);
+
+impl fmt::Display for Token {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(&format!("{:?}", self)[..])
+    }
+}
+
+impl From<Token> for String {
+    fn from(t: Token) -> String {
+        use self::Token::*;
+        match t {
+            PIPE => '|'.to_string(),
+            STAR => '*'.to_string(),
+            PLUS => '+'.to_string(),
+            QUESTION => '?'.to_string(),
+            LPAREN => '('.to_string(),
+            RPAREN => ')'.to_string(),
+            DOT => '.'.to_string(),
+            CARET => '^'.to_string(),
+            DOLLAR => '$'.to_string(),
+            BACKSLASH => '\\'.to_string(),
+            LBRACKET => '['.to_string(),
+            RBRACKET => ']'.to_string(),
+            DASH => '-'.to_string(),
+            OTHER(c) => c.to_string(),
+        }
+    }
+}
+
+impl Token {
+    pub fn to_string(self) -> String {
+        self.into()
+    }
+
+    pub fn to_char(&self) -> char {
+        use self::Token::*;
+        match self {
+            PIPE => '|',
+            STAR => '*',
+            PLUS => '+',
+            QUESTION => '?',
+            LPAREN => '(',
+            RPAREN => ')',
+            DOT => '.',
+            CARET => '^',
+            DOLLAR => '$',
+            BACKSLASH => '\\',
+            LBRACKET => '[',
+            RBRACKET => ']',
+            DASH => '-',
+            OTHER(c) => *c,
+        }
+    }
+}
+
+pub(crate) struct Lexer<'input> {
+    offset: usize,
+    input: &'input str,
+}
+
+impl<'input> Lexer<'input> {
+    pub fn new(input: &'input str) -> Self {
+        Lexer { offset: 0, input }
+    }
+}
+
+impl<'input> Iterator for Lexer<'input> {
+    type Item = LexerItem<Token, usize, LexicalError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        use self::Token::*;
+
+        tracer!(super::TRACE, "regex::Lexer::next");
+
+        // Returns the length of the first character in s in bytes.
+        // If s is empty, returns 0.
+        fn char_bytes(s: &str) -> usize {
+            if let Some(c) = s.chars().next() {
+                c.len_utf8()
+            } else {
+                0
+            }
+        }
+
+        let one = |input: &'input str| -> Option<Token> {
+            let c = input.chars().next()?;
+            Some(match c {
+                '|' => PIPE,
+                '*' => STAR,
+                '+' => PLUS,
+                '?' => QUESTION,
+                '(' => LPAREN,
+                ')' => RPAREN,
+                '.' => DOT,
+                '^' => CARET,
+                '$' => DOLLAR,
+                '\\' => BACKSLASH,
+                '[' => LBRACKET,
+                ']' => RBRACKET,
+                '-' => DASH,
+                _ => OTHER(c),
+            })
+        };
+
+        let l = char_bytes(self.input);
+        let t = match one(self.input) {
+            Some(t) => t,
+            None => return None,
+        };
+
+        self.input = &self.input[l..];
+
+        let start = self.offset;
+        let end = start + l;
+        self.offset += l;
+
+        t!("Returning token at offset {}: '{:?}'",
+           start, t);
+
+        Some(Ok((start, t, end)))
+    }
+}
+
+impl<'input> From<&'input str> for Lexer<'input> {
+    fn from(i: &'input str) -> Lexer<'input> {
+        Lexer::new(i)
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn lexer() {
+        fn lex(s: &str, expected: &[Token]) {
+            let tokens: Vec<Token> = Lexer::new(s)
+                .map(|t| t.unwrap().1)
+                .collect();
+
+            assert_eq!(&tokens[..], expected,
+                       "{}", s);
+        }
+
+        use Token::*;
+        lex("|", &[ PIPE ]);
+        lex("*", &[ STAR ]);
+        lex("+", &[ PLUS ]);
+        lex("?", &[ QUESTION ]);
+        lex("(", &[ LPAREN ]);
+        lex(")", &[ RPAREN ]);
+        lex(".", &[ DOT ]);
+        lex("^", &[ CARET ]);
+        lex("$", &[ DOLLAR ]);
+        lex("\\", &[ BACKSLASH ]);
+        lex("[", &[ LBRACKET ]);
+        lex("]", &[ RBRACKET ]);
+        lex("-", &[ DASH ]);
+        lex("a", &[ OTHER('a') ]);
+        lex("aa", &[ OTHER('a'), OTHER('a') ]);
+        lex("foo", &[ OTHER('f'), OTHER('o'), OTHER('o') ]);
+
+        lex("foo\\bar", &[ OTHER('f'), OTHER('o'), OTHER('o'),
+                           BACKSLASH,
+                           OTHER('b'), OTHER('a'), OTHER('r') ]);
+        lex("*?!", &[ STAR, QUESTION, OTHER('!') ]);
+
+        // Multi-byte UTF-8.
+        lex("ßℝ💣", &[ OTHER('ß'), OTHER('ℝ'), OTHER('💣'), ]);
+        lex("(ß|ℝ|💣",
+            &[ LPAREN, OTHER('ß'), PIPE, OTHER('ℝ'), PIPE, OTHER('💣') ]);
+        lex("東京", &[ OTHER('東'), OTHER('京') ]);
+    }
+}
diff --git a/openpgp/src/regex/mod.rs b/openpgp/src/regex/mod.rs
new file mode 100644
index 00000000..626e5190
--- /dev/null
+++ b/openpgp/src/regex/mod.rs
@@ -0,0 +1,1943 @@
+//! OpenPGP regex parser.
+//!
+//! OpenPGP defines a [regular expression language].  It is used with
+//! [trust signatures] to scope the trust that they extend.
+//!
+//!   [regular expression language]: https://tools.ietf.org/html/rfc4880#section-8
+//!   [trust signatures]: https://tools.ietf.org/html/rfc4880#section-5.2.3.13
+//!
+//! Compared with most regular expression lanugages, OpenPGP's is
+//! quite simple.  In particular, it only includes the following
+//! features:
+//!
+//!   - Alternations using `|`,
+//!   - Grouping using `(` and `)`,
+//!   - The `*`, `+`, and `?` glob operators,
+//!   - The `^`, and `$` anchors,
+//!   - The '.' operator, positive *non-empty* ranges
+//!     (e.g. `[a-zA-Z]`) and negative *non-empty* ranges (`[^@]`), and
+//!   - The backslash operator to escape special characters (except
+//!     in ranges).
+//!
+//! The regular expression engine defined in this module implements
+//! that language with two differences.  The first difference is that
+//! the compiler only works on UTF-8 strings (not bytes).  The second
+//! difference is that ranges in character classes are between UTF-8
+//! characters, not just ASCII characters.
+//!
+//! # Data Structures
+//!
+//! This module defines two data structures.  [`Regex`] encapsulates a
+//! valid regular expression, and provides methods to check whether
+//! the regular expression matches a string or a [`UserID`].
+//! [`RegexSet`] is similar, but encapsulates zero or more regular
+//! expressions, which may or may not be valid.  Its match methods
+//! return `true` if there are no regular expressions, or, if there is
+//! at least one regular expression, they return whether at least one
+//! of the regular expressions matches it.  `RegexSet`'s matcher
+//! handles invalid regular expressions by considering them to be
+//! regular expressions that don't match anything.  These semantics
+//! are consistent with a trust signature's scoping rules.  Further,
+//! strings that contain control characters never match.  This
+//! behavior can be overridden using [`Regex::disable_sanitizations`]
+//! and [`RegexSet::disable_sanitizations`].
+//!
+//!   [`Regex`]: struct.Regex.html
+//!   [`UserID`]: ../packet/struct.UserID.html
+//!   [`RegexSet`]: struct.RegexSet.html
+//!   [`Regex::disable_sanitizations`]: struct.Regex.html#method.disable_sanitizations
+//!   [`RegexSet::disable_sanitizations`]: struct.RegexSet.html#method.disable_sanitizations
+//!
+//! # Scoped Trust Signatures
+//!
+//! To create a trust signature, you create a signature whose [type]
+//! is either [GenericCertification], [PersonaCertification],
+//! [CasualCertification], or [PositiveCertification], and add a
+//! [Trust Signature] subpacket using, for instance, the
+//! [`SignatureBuilder::set_trust_signature`] method.
+//!
+//!   [type]: https://tools.ietf.org/html/rfc4880#section-5.2.1
+//!   [GenericCertification]: ../types/enum.SignatureType.html#variant.GenericCertification
+//!   [PersonaCertification]: ../types/enum.SignatureType.html#variant.PersonaCertification
+//!   [CasualCertification]: ../types/enum.SignatureType.html#variant.CasualCertification
+//!   [PositiveCertification]: ../types/enum.SignatureType.html#variant.PositiveCertification
+//!   [Trust Signature]: https://tools.ietf.org/html/rfc4880#section-5.2.3.13
+//!   [`SignatureBuilder::set_trust_signature`]: ../packet/signature/struct.SignatureBuilder.html#method.set_trust_signature
+//!
+//! To scope a trust signature, you add a [Regular Expression
+//! subpacket] to it using
+//! [`SignatureBuilder::set_regular_expression`] or
+//! [`SignatureBuilder::add_regular_expression`].
+//!
+//! To extract any regular expressions, you can use
+//! [`SignatureBuilder::regular_expressions`].
+//!
+//!   [Regular Expression subpacket]: https://tools.ietf.org/html/rfc4880#section-5.2.3.14
+//!   [`SignatureBuilder::set_regular_expression`]: ../packet/signature/struct.SignatureBuilder.html#method.set_regular_expression
+//!   [`SignatureBuilder::add_regular_expression`]: ../packet/signature/struct.SignatureBuilder.html#method.add_regular_expression
+//!   [`SignatureBuilder::regular_expressions`]: ../packet/signature/struct.SignatureBuilder.html#method.regular_expressions
+//!
+//! # Caveat Emptor
+//!
+//! Note: GnuPG has [very limited regular expression support].  In
+//! particular, it only recognizes regular expressions with the
+//! following form:
+//!
+//!   [very limited regular expression support]: https://dev.gnupg.org/source/gnupg/browse/master/g10/trustdb.c;15e065dee891eef9545556f210b4199107999869$1558
+//!
+//! ```text
+//! <[^>]+[@.]example\.com>$
+//! ```
+//!
+//! Further, it escapes any operators between the `<[^>]+[@.]` and the
+//! `>$` except `.` and `\`.  Otherwise, GnuPG treats the regular
+//! expression as a literal domain (e.g., `example.com`).
+//!
+//! Further, until [version 2.2.22] (released in August 2020), GnuPG
+//! did not support regular expressions on Windows, and other systems
+//! that don't include `regcomp`.  On these systems, if a trust
+//! signature included a regular expression, GnuPG conservatively
+//! considered the whole trust signature to match nothing.
+//!
+//!   [version 2.2.22]: https://dev.gnupg.org/T5030
+//!
+//! # Examples
+//!
+//! A CA signs two certificates, one for Alice, who works at
+//! `example.com`, and one for Bob, who is associated with `some.org`.
+//! Carol then creates a trust signature for the CA, which she scopes
+//! to `example.org` and `example.com`.  We then confirm that Carol
+//! can use the CA to authenticate Alice, but not Bob.
+//!
+//! ```
+//! use sequoia_openpgp as openpgp;
+//! use openpgp::cert::prelude::*;
+//! use openpgp::packet::prelude::*;
+//! use openpgp::policy::StandardPolicy;
+//! use openpgp::regex::RegexSet;
+//! use openpgp::types::SignatureType;
+//!
+//! # fn main() -> openpgp::Result<()> {
+//! let p = &StandardPolicy::new();
+//!
+//! let (ca, _)
+//!     = CertBuilder::general_purpose(None, Some("OpenPGP CA <openpgp-ca@example.com>"))
+//!         .generate()?;
+//! let mut ca_signer = ca.primary_key().key().clone()
+//!     .parts_into_secret()?.into_keypair()?;
+//! let ca_userid = ca.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID").userid();
+//!
+//! // The CA certifies "Alice <alice@example.com>".
+//! let (alice, _)
+//!     = CertBuilder::general_purpose(None, Some("Alice <alice@example.com>"))
+//!         .generate()?;
+//! let alice_userid = alice.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID").userid();
+//! let alice_certification = SignatureBuilder::new(SignatureType::GenericCertification)
+//!     .sign_userid_binding(
+//!         &mut ca_signer,
+//!         alice.primary_key().component(),
+//!         alice_userid)?;
+//! let alice = alice.insert_packets(alice_certification.clone())?;
+//! # assert!(alice.clone().into_packets().any(|p| {
+//! #   match p {
+//! #       Packet::Signature(sig) => sig == alice_certification,
+//! #       _ => false,
+//! #   }
+//! # }));
+//!
+//! // The CA certifies "Bob <bob@some.org>".
+//! let (bob, _)
+//!     = CertBuilder::general_purpose(None, Some("Bob <bob@some.org>"))
+//!         .generate()?;
+//! let bob_userid = bob.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID").userid();
+//! let bob_certification = SignatureBuilder::new(SignatureType::GenericCertification)
+//!     .sign_userid_binding(
+//!         &mut ca_signer,
+//!         bob.primary_key().component(),
+//!         bob_userid)?;
+//! let bob = bob.insert_packets(bob_certification.clone())?;
+//! # assert!(bob.clone().into_packets().any(|p| {
+//! #   match p {
+//! #       Packet::Signature(sig) => sig == bob_certification,
+//! #       _ => false,
+//! #   }
+//! # }));
+//!
+//!
+//! // Carol tsigns the CA's certificate.
+//! let (carol, _)
+//!     = CertBuilder::general_purpose(None, Some("Carol <carol@another.net>"))
+//!         .generate()?;
+//! let mut carol_signer = carol.primary_key().key().clone()
+//!     .parts_into_secret()?.into_keypair()?;
+//!
+//! let ca_tsig = SignatureBuilder::new(SignatureType::GenericCertification)
+//!     .set_trust_signature(2, 120)?
+//!     .set_regular_expression("<[^>]+[@.]example\\.org>$")?
+//!     .add_regular_expression("<[^>]+[@.]example\\.com>$")?
+//!     .sign_userid_binding(
+//!         &mut carol_signer,
+//!         ca.primary_key().component(),
+//!         ca_userid)?;
+//! let ca = ca.insert_packets(ca_tsig.clone())?;
+//! # assert!(ca.clone().into_packets().any(|p| {
+//! #   match p {
+//! #       Packet::Signature(sig) => sig == ca_tsig,
+//! #       _ => false,
+//! #   }
+//! # }));
+//!
+//!
+//! // Carol now tries to authenticate Alice and Bob's certificates
+//! // using the CA as a trusted introducer based on `ca_tsig`.
+//! let res = RegexSet::from_signature(&ca_tsig)?;
+//!
+//! // Should should be able to authenticate Alice.
+//! let alice_ua = alice.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID");
+//! # assert!(res.matches_userid(&alice_ua));
+//! let mut authenticated = false;
+//! for c in alice_ua.certifications() {
+//!     if c.get_issuers().into_iter().any(|h| h.aliases(ca.key_handle())) {
+//!         if c.clone().verify_userid_binding(
+//!             ca.primary_key().key(),
+//!             alice.primary_key().key(),
+//!             alice_ua.userid()).is_ok()
+//!         {
+//!             authenticated |= res.matches_userid(&alice_ua);
+//!         }
+//!     }
+//! }
+//! assert!(authenticated);
+//!
+//! // But, although the CA has certified Bob's key, Carol doesn't rely
+//! // on it, because Bob's email address ("bob@some.org") is out of
+//! // scope (some.org, not example.com).
+//! let bob_ua = bob.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID");
+//! # assert!(! res.matches_userid(&bob_ua));
+//! let mut have_certification = false;
+//! let mut authenticated = false;
+//! for c in bob_ua.certifications() {
+//!     if c.get_issuers().into_iter().any(|h| h.aliases(ca.key_handle())) {
+//!         if c.clone().verify_userid_binding(
+//!             ca.primary_key().key(),
+//!             bob.primary_key().key(),
+//!             bob_ua.userid()).is_ok()
+//!         {
+//!             have_certification = true;
+//!             authenticated |= res.matches_userid(&bob_ua);
+//!         }
+//!     }
+//! }
+//! assert!(have_certification);
+//! assert!(! authenticated);
+//! # Ok(()) }
+//! ```
+
+use std::borrow::Borrow;
+
+use lalrpop_util::ParseError;
+use regex_syntax::hir::{self, Hir};
+use regex;
+
+use crate::Error;
+use crate::Result;
+use crate::packet::prelude::*;
+use crate::types::SignatureType;
+
+pub(crate) mod lexer;
+lalrpop_util::lalrpop_mod!(
+    #[allow(clippy::all)]
+    #[allow(unused_parens)]
+    grammar,
+    "/regex/grammar.rs"
+);
+
+pub(crate) use self::lexer::Token;
+pub(crate) use self::lexer::{Lexer, LexicalError};
+
+const TRACE: bool = false;
+
+// Convert tokens into strings.
+//
+// Unfortunately, we can't implement From, because we don't define
+// ParseError in this crate.
+pub(crate) fn parse_error_downcast(e: ParseError<usize, Token, LexicalError>)
+    -> ParseError<usize, String, LexicalError>
+{
+    match e {
+        ParseError::UnrecognizedToken {
+            token: (start, t, end),
+            expected,
+        } => ParseError::UnrecognizedToken {
+            token: (start, t.into(), end),
+            expected,
+        },
+
+        ParseError::ExtraToken {
+            token: (start, t, end),
+        } => ParseError::ExtraToken {
+            token: (start, t.into(), end),
+        },
+
+        ParseError::InvalidToken { location }
+        => ParseError::InvalidToken { location },
+
+        ParseError::User { error }
+        => ParseError::User { error },
+
+        ParseError::UnrecognizedEOF { location, expected }
+        => ParseError::UnrecognizedEOF { location, expected },
+    }
+}
+
+// Used by grammar.lalrpop to generate a regex class (e.g. '[a-ce]').
+fn generate_class(caret: bool, chars: impl Iterator<Item=char>) -> Hir
+{
+    tracer!(TRACE, "generate_class");
+
+    // Dealing with ranges is a bit tricky.  We need to examine three
+    // tokens.  If the middle one is a dash, it's a range.
+
+    let chars: Vec<Option<char>> = chars
+        // Pad it out so what we can use windows to get three
+        // characters at a time, and be sure to process all
+        // characters.
+        .map(|c| Some(c))
+        .chain(std::iter::once(None))
+        .chain(std::iter::once(None))
+        .collect();
+    if chars.len() == 2 {
+        // The grammar doesn't allow an empty class.
+        unreachable!();
+    } else {
+        let r = chars
+            .windows(3)
+            .scan(0,
+                  |skip: &mut usize, x: &[Option<char>]|
+                      // Scan stops if the result is None.
+                      // filter_map keeps only those elements that
+                      // are Some.
+                      -> Option<Option<hir::ClassUnicodeRange>>
+                  {
+                      if *skip > 0 {
+                          *skip -= 1;
+                          t!("Skipping: {:?} (skip now: {})", x, skip);
+                          Some(None)
+                      } else {
+                          match (x[0], x[1], x[2]) {
+                              (Some(a), Some('-'), Some(c)) => {
+                                  // We've got a real range.
+                                  *skip = 2;
+                                  t!("range for '{}-{}'", a, c);
+                                  Some(Some(hir::ClassUnicodeRange::new(a, c)))
+                              }
+                              (Some(a), _, _) => {
+                                  t!("range for '{}'", a);
+                                  Some(Some(hir::ClassUnicodeRange::new(a, a)))
+                              }
+                              (None, _, _) => unreachable!(),
+                          }
+                      }
+                  })
+            .filter_map(|r| r);
+        let mut class = hir::Class::Unicode(hir::ClassUnicode::new(r));
+        if caret {
+            class.negate();
+        }
+        Hir::class(class)
+    }
+}
+
+/// A compiled OpenPGP regular expression for matching UTF-8 encoded
+/// strings.
+///
+/// A `Regex` contains a regular expression compiled according to the
+/// rules defined in [Section 8 of RFC 4880] modulo two differences.
+/// First, the compiler only works on UTF-8 strings (not bytes).
+/// Second, ranges in character classes are between UTF-8 characters,
+/// not just ASCII characters.  Further, by default, strings that
+/// don't pass a sanity check (in particular, include Unicode control
+/// characters) never match.  This behavior can be customized using
+/// [`Regex::disable_sanitizations`].
+///
+///   [Section 8 of RFC 4880]: https://tools.ietf.org/html/rfc4880#section-8
+///   [trust signatures]: https://tools.ietf.org/html/rfc4880#section-5.2.3.13
+///   [`Regex::disable_sanitizations`]: #method.disable_sanitizations
+///
+/// Regular expressions are used to scope the trust that [trust
+/// signatures] extend.
+///
+/// When working with trust signatures, you'll usually want to use the
+/// [`RegexSet`] data structure, which already implements the correct
+/// semantics.
+///
+///   [`RegexSet`]: struct.RegexSet.html
+///
+/// See the [module-level documentation] for more details.
+///
+///   [module-level documentation]: index.html
+#[derive(Clone, Debug)]
+pub struct Regex {
+    regex: regex::Regex,
+    disable_sanitizations: bool,
+}
+
+impl Regex {
+    /// Parses and compiles the regular expression.
+    ///
+    /// By default, strings that don't pass a sanity check (in
+    /// particular, include Unicode control characters) never match.
+    /// This behavior can be customized using
+    /// [`Regex::disable_sanitizations`].
+    ///
+    ///   [`Regex::disable_sanitizations`]: #method.disable_sanitizations
+    pub fn new(re: &str) -> Result<Self>
+    {
+        let re = re.as_ref();
+        let lexer = Lexer::new(re);
+        let hir = match grammar::RegexParser::new().parse(re, lexer) {
+            Ok(hir) => hir,
+            Err(err) => return Err(parse_error_downcast(err).into()),
+        };
+
+        // Converting the Hir to a string and the compiling that is
+        // apparently the canonical way to convert a Hir to a Regex
+        // (at least it is what rip-grep does, which the author of
+        // regex also wrote.  See
+        // ripgrep/crates/regex/src/config.rs:ConfiguredHir::regex.
+        let regex = regex::RegexBuilder::new(&hir.to_string())
+            .build()?;
+
+        Ok(Self {
+            regex,
+            disable_sanitizations: false,
+        })
+    }
+
+    /// Parses and compiles the regular expression.
+    ///
+    /// Returns an error if `re` is not a valid UTF-8 string.
+    ///
+    /// By default, strings that don't pass a sanity check (in
+    /// particular, include Unicode control characters) never match.
+    /// This behavior can be customized using
+    /// [`Regex::disable_sanitizations`].
+    ///
+    ///   [`Regex::disable_sanitizations`]: #method.disable_sanitizations
+    pub fn from_bytes(re: &[u8]) -> Result<Self> {
+        Self::new(std::str::from_utf8(re)?)
+    }
+
+    /// Controls whether matched strings must pass a sanity check.
+    ///
+    /// If `false` (the default), i.e., sanity checks are enabled, and
+    /// the string doesn't pass the sanity check (in particular, it
+    /// contains a Unicode control character according to
+    /// [`char::is_control`], including newlines and an embedded `NUL`
+    /// byte), this returns `false`.
+    ///
+    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
+    pub fn disable_sanitizations(&mut self, disabled: bool) {
+        self.disable_sanitizations = disabled;
+    }
+
+    /// Returns whether the regular expression matches the string.
+    ///
+    /// If sanity checks are enabled (the default) and the string
+    /// doesn't pass the sanity check (in particular, it contains a
+    /// Unicode control character according to [`char::is_control`],
+    /// including newlines and an embedded `NUL` byte), this returns
+    /// `false`.
+    ///
+    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
+    pub fn is_match(&self, s: &str) -> bool {
+        if ! self.disable_sanitizations {
+            if s.chars().any(char::is_control) {
+                return false;
+            }
+        }
+
+        self.is_match_clean(s)
+    }
+
+    // is_match, but without the sanity check.
+    fn is_match_clean(&self, s: &str) -> bool {
+        self.regex.is_match(s)
+    }
+
+    /// Returns whether the regular expression matches the User ID.
+    ///
+    /// If the User ID is not a valid UTF-8 string, this returns
+    /// `false`.
+    ///
+    /// If sanity checks are enabled (the default) and the string
+    /// doesn't pass the sanity check (in particular, it contains a
+    /// Unicode control character according to [`char::is_control`],
+    /// including newlines and an embedded `NUL` byte), this returns
+    /// `false`.
+    ///
+    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
+    pub fn matches_userid(&self, u: &UserID) -> bool {
+        if let Ok(u) = std::str::from_utf8(u.value()) {
+            self.is_match(u)
+        } else {
+            false
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+enum RegexSet_ {
+    Regex(Regex),
+    Invalid,
+}
+
+/// A set of regular expressions.
+///
+/// A `RegexSet` encapsulates a set of regular expressions.  The
+/// regular expressions are compiled according to the rules defined in
+/// [Section 8 of RFC 4880] modulo two differences.  First, the
+/// compiler only works on UTF-8 strings (not bytes).  Second, ranges
+/// in character classes are between UTF-8 characters, not just ASCII
+/// characters.  Further, by default, strings that don't pass a sanity
+/// check (in particular, include Unicode control characters) never
+/// match.  This behavior can be customized using
+/// [`RegexSet::disable_sanitizations`].
+///
+///   [Section 8 of RFC 4880]: https://tools.ietf.org/html/rfc4880#section-8
+///   [`RegexSet::disable_sanitizations`]: #method.disable_sanitizations
+///
+/// `RegexSet` implements the semantics of [regular expression]s used
+/// in [Trust Signatures].  In particular, a `RegexSet` makes it
+/// easier to deal with trust signatures that:
+///
+///   - Contain multiple Regular Expression subpackts,
+///   - Have no Regular Expression subpackets, and/or
+///   - Include one or more Regular Expression subpackets that are invalid.
+///
+///   [regular expressions]: https://tools.ietf.org/html/rfc4880#section-5.2.3.14
+///   [Trust Signatures]: https://tools.ietf.org/html/rfc4880#section-5.2.3.13
+///
+/// `RegexSet` compiles each regular expression individually.  If
+/// there are no regular expressions, the `RegexSet` matches
+/// everything.  If a regular expression is invalid, `RegexSet` treats
+/// it as if it doesn't match anything.  Thus, if all regular
+/// expressions are invalid, the `RegexSet` matches nothing (not
+/// everything!).
+///
+/// See the [module-level documentation] for more details.
+///
+///   [module-level documentation]: index.html
+#[derive(Clone, Debug)]
+pub struct RegexSet {
+    re_set: RegexSet_,
+    disable_sanitizations: bool,
+}
+
+impl RegexSet {
+    /// Parses and compiles the regular expressions.
+    ///
+    /// Invalid regular expressions do not cause this to fail.  See
+    /// [`RegexSet`]'s top-level documentation for details.
+    ///
+    ///   [`RegexSet`]: struct.RegexSet.html
+    ///
+    /// By default, strings that don't pass a sanity check (in
+    /// particular, include Unicode control characters) never match.
+    /// This behavior can be customized using
+    /// [`RegexSet::disable_sanitizations`].
+    ///
+    ///   [`RegexSet::disable_sanitizations`]: #method.disable_sanitizations
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use sequoia_openpgp as openpgp;
+    /// use openpgp::regex::RegexSet;
+    ///
+    /// # fn main() -> openpgp::Result<()> {
+    /// // Extract the regex and compile it.
+    /// let res = &[
+    ///     "<[^>]+[@.]example\\.org>$",
+    ///     // Invalid.
+    ///     "[..",
+    /// ];
+    ///
+    /// let res = RegexSet::new(res.iter())?;
+    ///
+    /// assert!(res.is_match("Alice <alice@example.org>"));
+    /// assert!(! res.is_match("Bob <bob@example.com>"));
+    /// # Ok(()) }
+    /// ```
+    pub fn new<'a, RE>(res: impl Iterator<Item=RE>) -> Result<Self>
+        where RE: Borrow<&'a str>
+    {
+        tracer!(TRACE, "RegexSet::new");
+
+        let mut regexes = Vec::with_capacity(2);
+        let mut had_good = false;
+        let mut had_bad = false;
+
+        for re in res {
+            let re = re.borrow();
+            let lexer = Lexer::new(re);
+            match grammar::RegexParser::new().parse(re, lexer) {
+                Ok(hir) => {
+                    had_good = true;
+                    let hir = hir::Hir::group(hir::Group {
+                        kind: hir::GroupKind::NonCapturing,
+                        hir: Box::new(hir),
+                    });
+                    regexes.push(hir);
+                }
+                Err(err) => {
+                    had_bad = true;
+                    t!("Compiling {:?}: {}", re, err);
+                }
+            }
+        }
+
+        if had_bad && ! had_good {
+            t!("All regular expressions were invalid.");
+            Ok(RegexSet {
+                re_set: RegexSet_::Invalid,
+                disable_sanitizations: false,
+            })
+        } else if ! had_bad && ! had_good {
+            // Match everything.
+            t!("No regular expressions provided.");
+            Ok(RegexSet {
+                re_set: RegexSet_::Regex(
+                    Regex {
+                        regex: regex::RegexBuilder::new(
+                            &Hir::empty().to_string())
+                            .build()?,
+                        disable_sanitizations: false,
+                    }),
+                disable_sanitizations: false,
+            })
+        } else {
+            // Match any of the regular expressions.
+            Ok(RegexSet {
+                re_set: RegexSet_::Regex(
+                    Regex {
+                        regex: regex::RegexBuilder::new(
+                            &Hir::alternation(regexes).to_string())
+                            .build()?,
+                        disable_sanitizations: false,
+                    }),
+                disable_sanitizations: false,
+            })
+        }
+    }
+
+    /// Parses and compiles the regular expressions.
+    ///
+    /// The regular expressions are first converted to UTF-8 strings.
+    /// Byte sequences that are not valid UTF-8 strings are considered
+    /// to be invalid regular expressions.  Invalid regular
+    /// expressions do not cause this to fail.  See [`RegexSet`]'s
+    /// top-level documentation for details.
+    ///
+    ///   [`RegexSet`]: struct.RegexSet.html
+    ///
+    /// By default, strings that don't pass a sanity check (in
+    /// particular, include Unicode control characters) never match.
+    /// This behavior can be customized using
+    /// [`RegexSet::disable_sanitizations`].
+    ///
+    ///   [`RegexSet::disable_sanitizations`]: #method.disable_sanitizations
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use sequoia_openpgp as openpgp;
+    /// use openpgp::regex::RegexSet;
+    ///
+    /// # fn main() -> openpgp::Result<()> {
+    /// // A valid and an invalid UTF-8 byte sequence.  The invalid
+    /// // sequence doesn't match anything.  But, that doesn't impact
+    /// // the other regular expressions.
+    /// let res: &[ &[u8] ] = &[
+    ///     &b"<[^>]+[@.]example\\.org>$"[..],
+    ///     // Invalid UTF-8.
+    ///     &b"\xC3\x28"[..],
+    /// ];
+    /// assert!(std::str::from_utf8(res[0]).is_ok());
+    /// assert!(std::str::from_utf8(res[1]).is_err());
+    ///
+    /// let re_set = RegexSet::from_bytes(res.into_iter())?;
+    ///
+    /// assert!(re_set.is_match("Alice <alice@example.org>"));
+    /// assert!(! re_set.is_match("Bob <bob@example.com>"));
+    ///
+    /// // If we only have invalid UTF-8 strings, then nothing
+    /// // matches.
+    /// let res: &[ &[u8] ] = &[
+    ///     // Invalid UTF-8.
+    ///     &b"\xC3\x28"[..],
+    /// ];
+    /// assert!(std::str::from_utf8(res[0]).is_err());
+    ///
+    /// let re_set = RegexSet::from_bytes(res.into_iter())?;
+    ///
+    /// assert!(! re_set.is_match("Alice <alice@example.org>"));
+    /// assert!(! re_set.is_match("Bob <bob@example.com>"));
+    ///
+    ///
+    /// // But, if we have no regular expressions, everything matches.
+    /// let res: &[ &[u8] ] = &[];
+    /// let re_set = RegexSet::from_bytes(res.into_iter())?;
+    ///
+    /// assert!(re_set.is_match("Alice <alice@example.org>"));
+    /// assert!(re_set.is_match("Bob <bob@example.com>"));
+    /// # Ok(()) }
+    /// ```
+    pub fn from_bytes<'a, RE>(res: impl Iterator<Item=RE>) -> Result<Self>
+        where RE: Borrow<&'a [u8]>
+    {
+        let mut have_valid_utf8 = false;
+        let mut have_invalid_utf8 = false;
+        let re_set = Self::new(
+            res
+                .scan((&mut have_valid_utf8, &mut have_invalid_utf8),
+                      |(valid, invalid), re|
+                      {
+                          if let Ok(re) = std::str::from_utf8(re.borrow()) {
+                              **valid = true;
+                              Some(Some(re))
+                          } else {
+                              **invalid = true;
+                              Some(None)
+                          }
+                      })
+                .filter_map(|re| re));
+
+        if !have_valid_utf8 && have_invalid_utf8 {
+            // None of the strings were valid UTF-8.  Reject
+            // everything.
+            Ok(RegexSet {
+                re_set: RegexSet_::Invalid,
+                disable_sanitizations: false,
+            })
+        } else {
+            // We had nothing or at least one string was valid UTF-8.
+            // RegexSet::new did the right thing.
+            re_set
+        }
+    }
+
+    /// Creates a `RegexSet` from the regular expressions stored in a
+    /// trust signature.
+    ///
+    /// This method is a convenience function, which extracts any
+    /// regular expressions from a [Trust Signature] and wraps them in a
+    /// `RegexSet`.
+    ///
+    ///   [Trust Signature]: https://tools.ietf.org/html/rfc4880#section-5.2.3.13
+    ///
+    /// If the signature is not a valid trust signature (its [type] is
+    /// [GenericCertification], [PersonaCertification],
+    /// [CasualCertification], or [PositiveCertification], and the
+    /// [Trust Signature] subpacket is present), this returns an
+    /// error.
+    ///
+    ///   [type]: https://tools.ietf.org/html/rfc4880#section-5.2.1
+    ///   [GenericCertification]: ../types/enum.SignatureType.html#variant.GenericCertification
+    ///   [PersonaCertification]: ../types/enum.SignatureType.html#variant.PersonaCertification
+    ///   [CasualCertification]: ../types/enum.SignatureType.html#variant.CasualCertification
+    ///   [PositiveCertification]: ../types/enum.SignatureType.html#variant.PositiveCertification
+    ///
+    /// By default, strings that don't pass a sanity check (in
+    /// particular, include Unicode control characters) never match.
+    /// This behavior can be customized using
+    /// [`RegexSet::disable_sanitizations`].
+    ///
+    ///   [`RegexSet::disable_sanitizations`]: #method.disable_sanitizations
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use sequoia_openpgp as openpgp;
+    /// # use openpgp::cert::prelude::*;
+    /// # use openpgp::packet::prelude::*;
+    /// # use openpgp::policy::StandardPolicy;
+    /// use openpgp::regex::RegexSet;
+    /// # use openpgp::types::SignatureType;
+    /// #
+    /// # fn main() -> openpgp::Result<()> {
+    /// # let p = &StandardPolicy::new();
+    /// #
+    /// # let (alice, _)
+    /// #     = CertBuilder::general_purpose(None, Some("Alice <alice@example.org>"))
+    /// #         .generate()?;
+    /// # let mut alices_signer = alice.primary_key().key().clone()
+    /// #     .parts_into_secret()?.into_keypair()?;
+    /// #
+    /// # let (example_com, _)
+    /// #     = CertBuilder::general_purpose(None, Some("OpenPGP CA <openpgp-ca@example.com>"))
+    /// #         .generate()?;
+    /// # let example_com_userid = example_com.with_policy(p, None)?
+    /// #     .userids().nth(0).expect("Added a User ID").userid();
+    /// #
+    /// # let certification = SignatureBuilder::new(SignatureType::GenericCertification)
+    /// #     .set_trust_signature(1, 120)?
+    /// #     .set_regular_expression("<[^>]+[@.]example\\.org>$")?
+    /// #     .add_regular_expression("<[^>]+[@.]example\\.com>$")?
+    /// #     .sign_userid_binding(
+    /// #         &mut alices_signer,
+    /// #         example_com.primary_key().component(),
+    /// #         example_com_userid)?;
+    ///
+    /// // certification is a trust signature, which contains two regular
+    /// // expressions: one that matches all mail addresses for 'example.org'
+    /// // and another that matches all mail addresses for 'example.com'.
+    /// let certification: &Signature = // ...;
+    /// # &certification;
+    ///
+    /// // Extract the regex and compile it.
+    /// let res = RegexSet::from_signature(certification)?;
+    ///
+    /// // Some positive examples.
+    /// assert!(res.is_match("Alice <alice@example.org>"));
+    /// assert!(res.is_match("Bob <bob@example.com>"));
+    ///
+    /// // Wrong domain.
+    /// assert!(! res.is_match("Carol <carol@acme.com>"));
+    ///
+    /// // The standard regex, "<[^>]+[@.]example\\.org>$" only matches
+    /// // email addresses wrapped in <>.
+    /// assert!(! res.is_match("dave@example.com"));
+    ///
+    /// // And, it is case sensitive.
+    /// assert!(res.is_match("Ellen <ellen@example.com>"));
+    /// assert!(! res.is_match("Ellen <ellen@EXAMPLE.COM>"));
+    /// # Ok(()) }
+    /// ```
+    pub fn from_signature(sig: &Signature) -> Result<Self>
+    {
+        use SignatureType::*;
+        match sig.typ() {
+            GenericCertification => (),
+            PersonaCertification => (),
+            CasualCertification => (),
+            PositiveCertification => (),
+            t => return Err(
+                Error::InvalidArgument(
+                    format!(
+                        "Expected a certification signature, found a {}",
+                        t).into())
+                    .into()),
+        }
+
+        if sig.trust_signature().is_none() {
+            return Err(
+                Error::InvalidArgument(
+                    "Expected a trust signature, \
+                     but the signature does not include \
+                     a valid Trust Signature subpacket".into())
+                    .into());
+        }
+
+        Self::from_bytes(sig.regular_expressions())
+    }
+
+    /// Controls whether strings with control characters are allowed.
+    ///
+    /// If `false` (the default), i.e., sanity checks are enabled, and
+    /// the string doesn't pass the sanity check (in particular, it
+    /// contains a Unicode control character according to
+    /// [`char::is_control`], including newlines and an embedded `NUL`
+    /// byte), this returns `false`.
+    ///
+    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
+    pub fn disable_sanitizations(&mut self, allowed: bool) {
+        self.disable_sanitizations = allowed;
+        if let RegexSet_::Regex(ref mut re) = self.re_set {
+            re.disable_sanitizations(allowed);
+        }
+    }
+
+    /// Returns whether the regular expression set matches the string.
+    ///
+    /// If sanity checks are enabled (the default) and the string
+    /// doesn't pass the sanity check (in particular, it contains a
+    /// Unicode control character according to [`char::is_control`],
+    /// including newlines and an embedded `NUL` byte), this returns
+    /// `false`.
+    ///
+    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
+    ///
+    /// If the `RegexSet` contains one or more regular expressions,
+    /// this method returns whether at least one of the regular
+    /// expressions matches.  Invalid regular expressions never match.
+    ///
+    /// If the `RegexSet` does not contain any regular expressions
+    /// (valid or otherwise), this method returns `true`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use sequoia_openpgp as openpgp;
+    /// use openpgp::regex::RegexSet;
+    ///
+    /// # fn main() -> openpgp::Result<()> {
+    /// // A regular expression that matches anything.  (Note: this is
+    /// // equivalent to providing no regular expressions.)
+    /// let res: &[ &str ] = &[
+    ///     &""[..],
+    /// ];
+    /// let re_set = RegexSet::new(res.into_iter())?;
+    ///
+    /// assert!(re_set.is_match("Alice Lovelace <alice@example.org>"));
+    ///
+    /// // If a User ID has an embedded control character, it doesn't
+    /// // match.
+    /// assert!(! re_set.is_match("Alice <alice@example.org>\0"));
+    /// # Ok(()) }
+    /// ```
+    pub fn is_match(&self, s: &str) -> bool {
+        if ! self.disable_sanitizations {
+            if s.chars().any(char::is_control) {
+                return false;
+            }
+        }
+
+        match self.re_set {
+            RegexSet_::Regex(ref re) =>
+                re.is_match_clean(s),
+            RegexSet_::Invalid =>
+                false,
+        }
+    }
+
+    /// Returns whether the regular expression matches the User ID.
+    ///
+    /// If the User ID is not a valid UTF-8 string, this returns `false`.
+    ///
+    /// If sanity checks are enabled (the default) and the string
+    /// doesn't pass the sanity check (in particular, it contains a
+    /// Unicode control character according to [`char::is_control`],
+    /// including newlines and an embedded `NUL` byte), this returns
+    /// `false`.
+    ///
+    ///   [`char::is_control`]: https://doc.rust-lang.org/std/primitive.char.html#method.is_control
+    ///
+    /// If the `RegexSet` contains one or more regular expressions,
+    /// this method returns whether at least one of the regular
+    /// expressions matches.  Invalid regular expressions never match.
+    ///
+    /// If the `RegexSet` does not contain any regular expressions
+    /// (valid or otherwise), this method returns `true`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use sequoia_openpgp as openpgp;
+    /// use openpgp::packet::UserID;
+    /// use openpgp::regex::RegexSet;
+    ///
+    /// # fn main() -> openpgp::Result<()> {
+    /// // A regular expression that matches anything.  (Note: this is
+    /// // equivalent to providing no regular expressions.)
+    /// let res: &[ &str ] = &[
+    ///     "",
+    /// ];
+    /// let re_set = RegexSet::new(res.into_iter())?;
+    ///
+    /// assert!(re_set.matches_userid(
+    ///     &UserID::from(&b"Alice Lovelace <alice@example.org>"[..])));
+    ///
+    /// // If a User ID is not valid UTF-8, it never matches.
+    /// assert!(! re_set.matches_userid(
+    ///     &UserID::from(&b"Alice \xC3\x28 Lovelace <alice@example.org>"[..])));
+    ///
+    /// // If a User ID has an embedded control character, it doesn't
+    /// // match.
+    /// assert!(! re_set.matches_userid(
+    ///     &UserID::from(&b"Alice <alice@example.org>\0"[..])));
+    /// # Ok(()) }
+    /// ```
+    pub fn matches_userid(&self, u: &UserID) -> bool
+    {
+        let u = u.borrow();
+        if let Ok(u) = std::str::from_utf8(u.value()) {
+            self.is_match(u)
+        } else {
+            false
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn regex() -> Result<()> {
+        fn a(regex: &str, matches: &[(bool, &str)]) {
+            eprint!("{} -> ", regex);
+            let mut compiled = Regex::new(regex).unwrap();
+            compiled.disable_sanitizations(true);
+            eprintln!("{:?}", compiled);
+            for &(matches, text) in matches {
+                assert_eq!(matches, compiled.is_match(text),
+                           "regex: {}\n text: {:?} should{} match",
+                           regex, text, if matches { "" } else { " not" });
+            }
+        }
+        fn f(regex: &str) {
+            eprint!("{} -> ", regex);
+            let compiled = Regex::new(regex);
+            assert!(compiled.is_err());
+            eprintln!("failed (expected)");
+        }
+
+        // Test an important corner case: the + should only apply to
+        // the b!  See: https://github.com/rust-lang/regex/issues/731
+        a("xab+y", &[
+            (true, "xaby"),
+            (true, "xabby"),
+            (false, "xababy"),
+        ]);
+        a("x(ab+)y", &[
+            (false, "xy"),
+            (false, "xay"),
+            (true, "xaby"),
+            (true, "xabby"),
+            (true, "xabbby"),
+            (false, "xababy"),
+        ]);
+        // But here the + matches "ab", not just the "b".
+        a("x(ab)+y", &[
+            (false, "xy"),
+            (true, "xaby"),
+            (false, "xabby"),
+            (true, "xababy"),
+            (true, "xabababy"),
+            (false, "x(ab)y"),
+        ]);
+
+
+
+        a("", &[
+            (true, "s"),
+            (true, "ss"),
+        ]);
+        a("s", &[
+            (true, "s"),
+            (true, "ss"),
+            (false, "a"),
+            (true, "hello, my prettiessss"),
+            (false, "S"),
+        ]);
+        a("ss", &[
+            (false, "s"),
+            (true, "ss"),
+            (true, "sss"),
+            (false, "this has lots of ses, but not two ses together"),
+            (true, "halloss"),
+        ]);
+
+        a("a|b", &[
+            (true, "a"),
+            (true, "b"),
+            (false, "c"),
+            (true, "xxxaxxxbxxx"),
+        ]);
+        a("a|b|c", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (false, "d"),
+            (true, "xxxaxxxbxxx"),
+        ]);
+        // This should match anything.
+        a("|a", &[
+            (true, "a"),
+            (true, "b"),
+        ]);
+        a("a|", &[
+            (true, "a"),
+            (true, "b"),
+        ]);
+        a("|a|b", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+        ]);
+        a("|a|b|c|d", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (true, "d"),
+            (true, "eeee"),
+        ]);
+        a("a|b|", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+        ]);
+        a("a|b|c|", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (true, "d"),
+            (true, "eeee"),
+        ]);
+        a("|", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (true, "d"),
+            (true, "eeee"),
+        ]);
+        a("|a|", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (true, "d"),
+            (true, "eeee"),
+        ]);
+        a("|a|b|", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (true, "d"),
+            (true, "eeee"),
+        ]);
+        // This is: "ab" or "cd", not a followed by b or c followed by d:
+        //
+        //   A regular expression is zero or more branches, separated by '|'.
+        //   ...
+        //   A branch is zero or more pieces, concatenated.
+        //   ...
+        //   A piece is an atom
+        //   ...
+        //   An atom is... a single character.
+        a("ab|cd", &[
+            (true, "abd"),
+            (true, "acd"),
+            (true, "abcd"),
+            (false, "ad"),
+            (false, "b"),
+            (false, "c"),
+            (false, "bb"),
+        ]);
+
+        a("a*", &[
+            (true, ""),
+            (true, "a"),
+            (true, "aa"),
+            (true, "b"),
+        ]);
+        a("xa*y", &[
+            (true, "xy"),
+            (true, "xay"),
+            (true, "xaay"),
+            (false, "y"),
+            (false, "ay"),
+            (false, "aay"),
+            (false, "x y"),
+            (false, "x ay"),
+            (false, "x aay"),
+        ]);
+        f("*");
+
+        a("a+", &[
+            (false, ""),
+            (true, "a"),
+            (true, "aa"),
+            (false, "b"),
+            (true, "baab"),
+            (true, "by ab"),
+            (true, "baa b"),
+        ]);
+        a("ab+", &[
+            (false, ""),
+            (false, "a"),
+            (false, "b"),
+            (true, "ab"),
+            (false, "bb"),
+            (true, "baab"),
+            (true, "by ab"),
+            (false, "baa b"),
+        ]);
+        f("+");
+
+        a("a?", &[
+            (true, ""),
+            (true, "a"),
+            (true, "aa"),
+            (true, "aaa"),
+            (true, "b"),
+            (true, "baab"),
+            (true, "by ab"),
+            (true, "baa b"),
+        ]);
+        a("xa?y", &[
+            (false, ""),
+            (true, "xy"),
+            (false, "a"),
+            (true, "xay"),
+            (false, "aa"),
+            (false, "xaay"),
+            (false, "b"),
+            (false, "bxaayb"),
+            (true, "by xayb"),
+            (true, "baxay b"),
+        ]);
+        f("?");
+
+        f("a*?");
+        a("a*b?c+", &[
+            (false, ""),
+            (true, "c"),
+            (true, "abc"),
+            (true, "aabbcc"),
+            (false, "aab"),
+            (true, "aaaaaabcccccccc"),
+        ]);
+        f("a?*+");
+
+        a("a?|b+", &[
+            (true, ""),
+            (true, "aaa"),
+            (true, "bbb"),
+            (true, "abaa"),
+        ]);
+        a("a+|b+", &[
+            (false, ""),
+            (true, "a"),
+            (true, "aaa"),
+            (true, "b"),
+            (true, "bbb"),
+            (true, "abaa"),
+        ]);
+        a("a+|b+|c+", &[
+            (false, ""),
+            (true, "a"),
+            (true, "aaa"),
+            (true, "b"),
+            (true, "bbb"),
+            (true, "abaa"),
+            (true, "c"),
+            (true, "ccc"),
+            (true, "abaaccc"),
+        ]);
+        a("xa+|b+|c+y", &[
+            (false, ""),
+            (true, "xa"),
+            (true, "xaa"),
+            (true, "b"),
+            (true, "bb"),
+            (true, "cy"),
+            (true, "ccy"),
+
+            (false, "a"),
+            (false, "aaa"),
+            (false, "c"),
+            (false, "ccc"),
+        ]);
+        a("xa+y|sb+u", &[
+            (false, ""),
+            (true, "xay"),
+            (true, "xaay"),
+            (true, "sbu"),
+            (true, "sbbu"),
+            (true, "xysbu"),
+
+            (false, "a"),
+            (false, "aaa"),
+            (false, "xyu"),
+            (false, "ccc"),
+        ]);
+        a("a*|a+|ab+cd+|", &[
+            (true, ""),
+        ]);
+
+        a("()", &[
+            (true, ""),
+            (true, "xyzzy"),
+        ]);
+        a("(())", &[
+            (true, ""),
+            (true, "xyzzy"),
+        ]);
+        a("((()))", &[
+            (true, ""),
+            (true, "xyzzy"),
+        ]);
+        f("((())");
+        f("((())))");
+        a("(a)", &[
+            (true, "a"),
+            (true, "(a)"),
+            (false, "b"),
+        ]);
+        a("x(a)y", &[
+            (false, "xy"),
+            (true, "xay"),
+            (false, "x(a)y"),
+            (true, "(xay)"),
+            (false, "a"),
+            (false, "yax"),
+        ]);
+        a("x(ab)y", &[
+            (false, "xy"),
+            (false, "xay"),
+            (false, "xby"),
+            (true, "xaby"),
+            (false, "x(ab)y"),
+            (true, "(xaby)"),
+        ]);
+        a("x(ab)(cd)y", &[
+            (true, "xabcdy"),
+            (true, "zxabcdyz"),
+        ]);
+        a("a(bc)d(ef)g", &[
+            (true, "abcdefg"),
+            (true, "xabcdefgy"),
+            (false, "xa(bc)d(ef)gy"),
+        ]);
+        a("a((bc))d((ef))g", &[
+            (true, "abcdefg"),
+            (true, "xabcdefgy"),
+            (false, "xa(bc)d(ef)gy"),
+        ]);
+        a("a(b(c)d)e", &[
+            (true, "abcde"),
+            (true, "xabcdey"),
+            (false, "xa(b(c)d)ey"),
+        ]);
+        a("x(a+|b+)y", &[
+            (false, "xy"),
+            (true, "xay"),
+            (true, "xby"),
+            (true, "xaay"),
+            (true, "xbby"),
+            (false, "xaby"),
+            (false, "xaaby"),
+            (false, "xabby"),
+            (false, "xaabby"),
+            (false, "xcy"),
+        ]);
+
+        a(".", &[
+            (false, ""),
+            (true, "a"),
+            (true, "ab"),
+            (true, "ab\nc"),
+            (true, "ab.c"),
+        ]);
+        a("x.y", &[
+            (false, ""),
+            (false, "xy"),
+            (true, "xay"),
+            (true, "x\ny"),
+            (true, "x.y"),
+            (false, "x..y"),
+        ]);
+
+        a("^", &[
+            (true, ""),
+            (true, "xx"),
+        ]);
+        a("^abc", &[
+            (false, ""),
+            (true, "abcdef"),
+            (false, "xabcdef"),
+            (false, "\nabcdef"),
+        ]);
+        a("(^abc|^def)", &[
+            (false, ""),
+            (true, "abcd"),
+            (true, "defg"),
+            (false, "xabcd"),
+            (false, "xdefg"),
+            (false, "^abc"),
+            (false, "^(abc|def)"),
+            (false, "\nabcdef"),
+        ]);
+        a("(^abc|def)", &[
+            (false, ""),
+            (true, "abcd"),
+            (true, "defg"),
+            (false, "xabcd"),
+            (true, "xdefg"),
+            (false, "^abc"),
+            (true, "^(abc|def)"),
+            (false, "\nabcde"),
+        ]);
+        a("^^", &[
+            (true, ""),
+            (true, "abcdef"),
+        ]);
+        a("^abc^", &[
+            (false, ""),
+            (false, "abcdef"),
+            (false, "xabcdef"),
+            (false, "abc\n"),
+            (false, "\nabc\n"),
+            (false, "^abc^"),
+        ]);
+
+        a("$", &[
+            (true, ""),
+            (true, "abc"),
+        ]);
+        a("abc$", &[
+            (false, ""),
+            (true, "abc"),
+            (false, "abcx"),
+            (false, "abc\n"),
+            (false, "abc$"),
+        ]);
+        a("abc$$", &[
+            (false, ""),
+            (true, "abc"),
+            (false, "abcx"),
+            (false, "abc\n"),
+            (false, "abc$"),
+        ]);
+        a("(abc$)x", &[
+            (false, ""),
+            (false, "abc"),
+            (false, "abcx"),
+            (false, "abc\nx"),
+            (false, "abc$x"),
+        ]);
+        a("abc$|def$", &[
+            (false, ""),
+            (true, "abc"),
+            (false, "abcx"),
+            (false, "abc\n"),
+            (false, "abc$"),
+            (true, "def"),
+            (false, "defx"),
+            (false, "def\n"),
+            (false, "def$"),
+            (true, "abcdef"),
+        ]);
+
+        a("\\|", &[
+            (true, "|"),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\*", &[
+            (true, "*"),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\+", &[
+            (true, "+"),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\?", &[
+            (true, "?"),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\.", &[
+            (true, "."),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\^", &[
+            (true, "^"),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\$", &[
+            (true, "$"),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\\\", &[
+            (true, "\\"),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\[", &[
+            (true, "["),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\]", &[
+            (true, "]"),
+            (false, ""),
+            (false, "a"),
+        ]);
+        a("\\-", &[
+            (true, "-"),
+            (false, ""),
+            (false, "a"),
+        ]);
+        f("\\");
+
+        a("[a]", &[
+            (true, "a"),
+            (false, "b"),
+        ]);
+        a("[abc]", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (false, "d"),
+        ]);
+        a("[a-c]", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (false, "d"),
+        ]);
+        a("[xa-c]", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (true, "x"),
+            (false, "d"),
+        ]);
+        a("[a-cxyz]", &[
+            (true, "a"),
+            (true, "b"),
+            (true, "c"),
+            (true, "x"),
+            (false, "d"),
+        ]);
+        a("[a-c]x", &[
+