openpgp: Add regex support.

- Fixes #188.
author: Neal H. Walfield <neal@pep.foundation> 2021-01-08 14:29:22 +0100
committer: Neal H. Walfield <neal@pep.foundation> 2021-01-08 14:35:01 +0100
commit: 3367828e487fa79d2ec0a615e6b15dd5fbee5c9a (patch)
tree: eeb9f0652345a5f3a7b73609fb2c23475a2b71a9
parent: 17253b2e7fa297426a841042fa955ca69955f00f (diff)
6 files changed, 2372 insertions, 0 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 4bbfe62d..9d5a238f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2027,6 +2027,7 @@ dependencies = [
  "quickcheck",
  "rand",
  "regex",
+ "regex-syntax",
  "rpassword",
  "sha1collisiondetection",
  "thiserror",
diff --git a/openpgp/Cargo.toml b/openpgp/Cargo.toml
index a56ce401..35827295 100644
--- a/openpgp/Cargo.toml
+++ b/openpgp/Cargo.toml
@@ -38,6 +38,7 @@ libc = "0.2.66"
 memsec = { version = ">=0.5", default-features = false }
 nettle = { version = "7", optional = true }
 regex = "1"
+regex-syntax = "0.6"
 sha1collisiondetection = { version = "0.2.3", default-features = false, features = ["std"] }
 thiserror = "1.0.2"
 backtrace = "0.3.3"
diff --git a/openpgp/src/lib.rs b/openpgp/src/lib.rs
index 7670f736..da44fb4c 100644
--- a/openpgp/src/lib.rs
+++ b/openpgp/src/lib.rs
@@ -159,6 +159,7 @@ mod keyid;
 pub use keyid::KeyID;
 mod keyhandle;
 pub use keyhandle::KeyHandle;
+pub mod regex;
 pub mod policy;
 
 pub(crate) mod seal;
diff --git a/openpgp/src/regex/grammar.lalrpop b/openpgp/src/regex/grammar.lalrpop
new file mode 100644
index 00000000..e9e619b5
--- /dev/null
+++ b/openpgp/src/regex/grammar.lalrpop
@@ -0,0 +1,204 @@
+// -*- mode: Rust; -*-
+
+use super::generate_class;
+use super::lexer;
+use super::lexer::{Token, LexicalError};
+use regex_syntax::hir::{self, Hir};
+
+// Pass in the original, untokenized input to facilitate error
+// recovery.
+grammar<'input>(input: &'input str);
+
+// This is a straightforward translation of the regular expression
+// grammar from section 8 of RFC 4880.
+//
+// https://tools.ietf.org/html/rfc4880#section-8
+pub(crate) Regex : Hir = {
+    <l:LBranch> <r:RBranch*> => {
+        let mut r = r;
+        r.insert(0, l);
+        Hir::alternation(r)
+    },
+}
+
+LBranch : Hir = {
+    Branch,
+}
+
+RBranch : Hir = {
+    PIPE <Branch>,
+}
+
+Branch : Hir = {
+    <p:Piece*> => {
+        hir::Hir::group(hir::Group {
+            kind: hir::GroupKind::NonCapturing,
+            hir: Box::new(hir::Hir::concat(p)),
+        })
+    },
+}
+
+Piece : Hir = {
+    <a:Atom> => a,
+    <a:Atom> STAR => {
+        hir::Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrMore,
+            greedy: true,
+            hir: Box::new(a)
+        })
+    },
+    <a:Atom> PLUS => {
+        hir::Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::OneOrMore,
+            greedy: true,
+            hir: Box::new(a)
+        })
+    },
+    <a:Atom> QUESTION => {
+        hir::Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrOne,
+            greedy: true,
+            hir: Box::new(a)
+        })
+    },
+}
+
+Atom : Hir = {
+    LPAREN <r:Regex> RPAREN => {
+        hir::Hir::group(hir::Group {
+            kind: hir::GroupKind::NonCapturing,
+            hir: Box::new(r),
+        })
+    },
+
+    Range,
+
+    DOT => {
+        hir::Hir::any(false)
+    },
+    CARET => {
+        hir::Hir::anchor(hir::Anchor::StartText)
+    },
+    DOLLAR => {
+        hir::Hir::anchor(hir::Anchor::EndText)
+    },
+
+    BACKSLASH <t:AnyChar> => {
+        hir::Hir::literal(hir::Literal::Unicode(t.to_char()))
+    },
+
+    <t:OTHER> => {
+        hir::Hir::literal(hir::Literal::Unicode(t.to_char()))
+    },
+
+}
+
+Range : Hir = {
+    LBRACKET <c:CARET?> <class1:RBRACKET> <class2:NotRBracket*> RBRACKET => {
+        generate_class(c.is_some(),
+                       std::iter::once(class1.to_char())
+                       .chain(class2.into_iter().map(|t| t.to_char())))
+    },
+    LBRACKET CARET <class:NotRBracket+> RBRACKET => {
+        generate_class(true,
+                       class.into_iter().map(|t| t.to_char()))
+    },
+    LBRACKET <class1:NotCaretNotRBracket> <class2:NotRBracket*> RBRACKET => {
+        generate_class(false,
+                       std::iter::once(class1.to_char())
+                       .chain(class2.into_iter().map(|t| t.to_char())))
+    },
+}
+
+NotRBracket : Token = {
+    PIPE => Token::OTHER('|'),
+
+    STAR => Token::OTHER('*'),
+    PLUS => Token::OTHER('+'),
+    QUESTION => Token::OTHER('?'),
+
+    LPAREN => Token::OTHER('('),
+    RPAREN => Token::OTHER(')'),
+
+    DOT => Token::OTHER('.'),
+    CARET => Token::OTHER('^'),
+    DOLLAR => Token::OTHER('$'),
+    BACKSLASH => Token::OTHER('\\'),
+
+    LBRACKET => Token::OTHER('['),
+    // RBRACKET => Token::OTHER(']'),
+    DASH => Token::OTHER('-'),
+
+    OTHER,
+}
+
+NotCaretNotRBracket : Token = {
+    PIPE => Token::OTHER('|'),
+
+    STAR => Token::OTHER('*'),
+    PLUS => Token::OTHER('+'),
+    QUESTION => Token::OTHER('?'),
+
+    LPAREN => Token::OTHER('('),
+    RPAREN => Token::OTHER(')'),
+
+    DOT => Token::OTHER('.'),
+    // CARET => Token::OTHER('^'),
+    DOLLAR => Token::OTHER('$'),
+    BACKSLASH => Token::OTHER('\\'),
+
+    LBRACKET => Token::OTHER('['),
+    // RBRACKET => Token::OTHER(']'),
+    DASH => Token::OTHER('-'),
+
+    OTHER,
+}
+
+AnyChar : Token = {
+    PIPE => Token::OTHER('|'),
+
+    STAR => Token::OTHER('*'),
+    PLUS => Token::OTHER('+'),
+    QUESTION => Token::OTHER('?'),
+
+    LPAREN => Token::OTHER('('),
+    RPAREN => Token::OTHER(')'),
+
+    DOT => Token::OTHER('.'),
+    CARET => Token::OTHER('^'),
+    DOLLAR => Token::OTHER('$'),
+    BACKSLASH => Token::OTHER('\\'),
+
+    LBRACKET => Token::OTHER('['),
+    RBRACKET => Token::OTHER(']'),
+    DASH => Token::OTHER('-'),
+
+    OTHER,
+}
+
+extern {
+    type Location = usize;
+    type Error = LexicalError;
+
+    enum lexer::Token {
+        PIPE => lexer::Token::PIPE,
+
+        STAR => lexer::Token::STAR,
+        PLUS => lexer::Token::PLUS,
+        QUESTION => lexer::Token::QUESTION,
+
+        LPAREN => lexer::Token::LPAREN,
+        RPAREN => lexer::Token::RPAREN,
+
+        DOT => lexer::Token::DOT,
+        CARET => lexer::Token::CARET,
+        DOLLAR => lexer::Token::DOLLAR,
+        BACKSLASH => lexer::Token::BACKSLASH,
+
+        LBRACKET => lexer::Token::LBRACKET,
+        RBRACKET => lexer::Token::RBRACKET,
+        DASH => lexer::Token::DASH,
+
+        OTHER => lexer::Token::OTHER(_),
+    }
+}
diff --git a/openpgp/src/regex/lexer.rs b/openpgp/src/regex/lexer.rs
new file mode 100644
index 00000000..18300d90
--- /dev/null
+++ b/openpgp/src/regex/lexer.rs
@@ -0,0 +1,222 @@
+use std::fmt;
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub enum LexicalError {
+}
+
+impl fmt::Display for LexicalError {
+    // This trait requires `fmt` with this exact signature.
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{}", "{}")
+    }
+}
+
+pub type Spanned<Token, Loc, LexicalError>
+    = Result<(Loc, Token, Loc), LexicalError>;
+
+// The type of the parser's input.
+//
+// The parser iterators over tuples consisting of the token's starting
+// position, the token itself, and the token's ending position.
+pub(crate) type LexerItem<Token, Loc, LexicalError>
+    = Spanned<Token, Loc, LexicalError>;
+
+/// The components of an OpenPGP Message.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Token {
+    PIPE,
+
+    STAR,
+    PLUS,
+    QUESTION,
+
+    LPAREN,
+    RPAREN,
+
+    DOT,
+    CARET,
+    DOLLAR,
+    BACKSLASH,
+
+    LBRACKET,
+    RBRACKET,
+    DASH,
+
+    OTHER(char),
+}
+assert_send_and_sync!(Token);
+
+impl fmt::Display for Token {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(&format!("{:?}", self)[..])
+    }
+}
+
+impl From<Token> for String {
+    fn from(t: Token) -> String {
+        use self::Token::*;
+        match t {
+            PIPE => '|'.to_string(),
+            STAR => '*'.to_string(),
+            PLUS => '+'.to_string(),
+            QUESTION => '?'.to_string(),
+            LPAREN => '('.to_string(),
+            RPAREN => ')'.to_string(),
+            DOT => '.'.to_string(),
+            CARET => '^'.to_string(),
+            DOLLAR => '$'.to_string(),
+            BACKSLASH => '\\'.to_string(),
+            LBRACKET => '['.to_string(),
+            RBRACKET => ']'.to_string(),
+            DASH => '-'.to_string(),
+            OTHER(c) => c.to_string(),
+        }
+    }
+}
+
+impl Token {
+    pub fn to_string(self) -> String {
+        self.into()
+    }
+
+    pub fn to_char(&self) -> char {
+        use self::Token::*;
+        match self {
+            PIPE => '|',
+            STAR => '*',
+            PLUS => '+',
+            QUESTION => '?',
+            LPAREN => '(',
+            RPAREN => ')',
+            DOT => '.',
+            CARET => '^',
+            DOLLAR => '$',
+            BACKSLASH => '\\',
+            LBRACKET => '[',
+            RBRACKET => ']',
+            DASH => '-',
+            OTHER(c) => *c,
+        }
+    }
+}
+
+pub(crate) struct Lexer<'input> {
+    offset: usize,
+    input: &'input str,
+}
+
+impl<'input> Lexer<'input> {
+    pub fn new(input: &'input str) -> Self {
+        Lexer { offset: 0, input }
+    }
+}
+
+impl<'input> Iterator for Lexer<'input> {
+    type Item = LexerItem<Token, usize, LexicalError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        use self::Token::*;
+
+        tracer!(super::TRACE, "regex::Lexer::next");
+
+        // Returns the length of the first character in s in bytes.
+        // If s is empty, returns 0.
+        fn char_bytes(s: &str) -> usize {
+            if let Some(c) = s.chars().next() {
+                c.len_utf8()
+            } else {
+                0
+            }
+        }
+
+        let one = |input: &'input str| -> Option<Token> {
+            let c = input.chars().next()?;
+            Some(match c {
+                '|' => PIPE,
+                '*' => STAR,
+                '+' => PLUS,
+                '?' => QUESTION,
+                '(' => LPAREN,
+                ')' => RPAREN,
+                '.' => DOT,
+                '^' => CARET,
+                '$' => DOLLAR,
+                '\\' => BACKSLASH,
+                '[' => LBRACKET,
+                ']' => RBRACKET,
+                '-' => DASH,
+                _ => OTHER(c),
+            })
+        };
+
+        let l = char_bytes(self.input);
+        let t = match one(self.input) {
+            Some(t) => t,
+            None => return None,
+        };
+
+        self.input = &self.input[l..];
+
+        let start = self.offset;
+        let end = start + l;
+        self.offset += l;
+
+        t!("Returning token at offset {}: '{:?}'",
+           start, t);
+
+        Some(Ok((start, t, end)))
+    }
+}
+
+impl<'input> From<&'input str> for Lexer<'input> {
+    fn from(i: &'input str) -> Lexer<'input> {
+        Lexer::new(i)
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn lexer() {
+        fn lex(s: &str, expected: &[Token]) {
+            let tokens: Vec<Token> = Lexer::new(s)
+                .map(|t| t.unwrap().1)
+                .collect();
+
+            assert_eq!(&tokens[..], expected,
+                       "{}", s);
+        }
+
+        use Token::*;
+        lex("|", &[ PIPE ]);
+        lex("*", &[ STAR ]);
+        lex("+", &[ PLUS ]);
+        lex("?", &[ QUESTION ]);
+        lex("(", &[ LPAREN ]);
+        lex(")", &[ RPAREN ]);
+        lex(".", &[ DOT ]);
+        lex("^", &[ CARET ]);
+        lex("$", &[ DOLLAR ]);
+        lex("\\", &[ BACKSLASH ]);
+        lex("[", &[ LBRACKET ]);
+        lex("]", &[ RBRACKET ]);
+        lex("-", &[ DASH ]);
+        lex("a", &[ OTHER('a') ]);
+        lex("aa", &[ OTHER('a'), OTHER('a') ]);
+        lex("foo", &[ OTHER('f'), OTHER('o'), OTHER('o') ]);
+
+        lex("foo\\bar", &[ OTHER('f'), OTHER('o'), OTHER('o'),
+                           BACKSLASH,
+                           OTHER('b'), OTHER('a'), OTHER('r') ]);
+        lex("*?!", &[ STAR, QUESTION, OTHER('!') ]);
+
+        // Multi-byte UTF-8.
+        lex("ßℝ💣", &[ OTHER('ß'), OTHER('ℝ'), OTHER('💣'), ]);
+        lex("(ß|ℝ|💣",
+            &[ LPAREN, OTHER('ß'), PIPE, OTHER('ℝ'), PIPE, OTHER('💣') ]);
+        lex("東京", &[ OTHER('東'), OTHER('京') ]);
+    }
+}
diff --git a/openpgp/src/regex/mod.rs b/openpgp/src/regex/mod.rs
new file mode 100644
index 00000000..626e5190
--- /dev/null
+++ b/openpgp/src/regex/mod.rs
@@ -0,0 +1,1943 @@
+//! OpenPGP regex parser.
+//!
+//! OpenPGP defines a [regular expression language].  It is used with
+//! [trust signatures] to scope the trust that they extend.
+//!
+//!   [regular expression language]: https://tools.ietf.org/html/rfc4880#section-8
+//!   [trust signatures]: https://tools.ietf.org/html/rfc4880#section-5.2.3.13
+//!
+//! Compared with most regular expression lanugages, OpenPGP's is
+//! quite simple.  In particular, it only includes the following
+//! features:
+//!
+//!   - Alternations using `|`,
+//!   - Grouping using `(` and `)`,
+//!   - The `*`, `+`, and `?` glob operators,
+//!   - The `^`, and `$` anchors,
+//!   - The '.' operator, positive *non-empty* ranges
+//!     (e.g. `[a-zA-Z]`) and negative *non-empty* ranges (`[^@]`), and
+//!   - The backslash operator to escape special characters (except
+//!     in ranges).
+//!
+//! The regular expression engine defined in this module implements
+//! that language with two differences.  The first difference is that
+//! the compiler only works on UTF-8 strings (not bytes).  The second
+//! difference is that ranges in character classes are between UTF-8
+//! characters, not just ASCII characters.
+//!
+//! # Data Structures
+//!
+//! This module defines two data structures.  [`Regex`] encapsulates a
+//! valid regular expression, and provides methods to check whether
+//! the regular expression matches a string or a [`UserID`].
+//! [`RegexSet`] is similar, but encapsulates zero or more regular
+//! expressions, which may or may not be valid.  Its match methods
+//! return `true` if there are no regular expressions, or, if there is
+//! at least one regular expression, they return whether at least one
+//! of the regular expressions matches it.  `RegexSet`'s matcher
+//! handles invalid regular expressions by considering them to be
+//! regular expressions that don't match anything.  These semantics
+//! are consistent with a trust signature's scoping rules.  Further,
+//! strings that contain control characters never match.  This
+//! behavior can be overridden using [`Regex::disable_sanitizations`]
+//! and [`RegexSet::disable_sanitizations`].
+//!
+//!   [`Regex`]: struct.Regex.html
+//!   [`UserID`]: ../packet/struct.UserID.html
+//!   [`RegexSet`]: struct.RegexSet.html
+//!   [`Regex::disable_sanitizations`]: struct.Regex.html#method.disable_sanitizations
+//!   [`RegexSet::disable_sanitizations`]: struct.RegexSet.html#method.disable_sanitizations
+//!
+//! # Scoped Trust Signatures
+//!
+//! To create a trust signature, you create a signature whose [type]
+//! is either [GenericCertification], [PersonaCertification],
+//! [CasualCertification], or [PositiveCertification], and add a
+//! [Trust Signature] subpacket using, for instance, the
+//! [`SignatureBuilder::set_trust_signature`] method.
+//!
+//!   [type]: https://tools.ietf.org/html/rfc4880#section-5.2.1
+//!   [GenericCertification]: ../types/enum.SignatureType.html#variant.GenericCertification
+//!   [PersonaCertification]: ../types/enum.SignatureType.html#variant.PersonaCertification
+//!   [CasualCertification]: ../types/enum.SignatureType.html#variant.CasualCertification
+//!   [PositiveCertification]: ../types/enum.SignatureType.html#variant.PositiveCertification
+//!   [Trust Signature]: https://tools.ietf.org/html/rfc4880#section-5.2.3.13
+//!   [`SignatureBuilder::set_trust_signature`]: ../packet/signature/struct.SignatureBuilder.html#method.set_trust_signature
+//!
+//! To scope a trust signature, you add a [Regular Expression
+//! subpacket] to it using
+//! [`SignatureBuilder::set_regular_expression`] or
+//! [`SignatureBuilder::add_regular_expression`].
+//!
+//! To extract any regular expressions, you can use
+//! [`SignatureBuilder::regular_expressions`].
+//!
+//!   [Regular Expression subpacket]: https://tools.ietf.org/html/rfc4880#section-5.2.3.14
+//!   [`SignatureBuilder::set_regular_expression`]: ../packet/signature/struct.SignatureBuilder.html#method.set_regular_expression
+//!   [`SignatureBuilder::add_regular_expression`]: ../packet/signature/struct.SignatureBuilder.html#method.add_regular_expression
+//!   [`SignatureBuilder::regular_expressions`]: ../packet/signature/struct.SignatureBuilder.html#method.regular_expressions
+//!
+//! # Caveat Emptor
+//!
+//! Note: GnuPG has [very limited regular expression support].  In
+//! particular, it only recognizes regular expressions with the
+//! following form:
+//!
+//!   [very limited regular expression support]: https://dev.gnupg.org/source/gnupg/browse/master/g10/trustdb.c;15e065dee891eef9545556f210b4199107999869$1558
+//!
+//! ```text
+//! <[^>]+[@.]example\.com>$
+//! ```
+//!
+//! Further, it escapes any operators between the `<[^>]+[@.]` and the
+//! `>$` except `.` and `\`.  Otherwise, GnuPG treats the regular
+//! expression as a literal domain (e.g., `example.com`).
+//!
+//! Further, until [version 2.2.22] (released in August 2020), GnuPG
+//! did not support regular expressions on Windows, and other systems
+//! that don't include `regcomp`.  On these systems, if a trust
+//! signature included a regular expression, GnuPG conservatively
+//! considered the whole trust signature to match nothing.
+//!
+//!   [version 2.2.22]: https://dev.gnupg.org/T5030
+//!
+//! # Examples
+//!
+//! A CA signs two certificates, one for Alice, who works at
+//! `example.com`, and one for Bob, who is associated with `some.org`.
+//! Carol then creates a trust signature for the CA, which she scopes
+//! to `example.org` and `example.com`.  We then confirm that Carol
+//! can use the CA to authenticate Alice, but not Bob.
+//!
+//! ```
+//! use sequoia_openpgp as openpgp;
+//! use openpgp::cert::prelude::*;
+//! use openpgp::packet::prelude::*;
+//! use openpgp::policy::StandardPolicy;
+//! use openpgp::regex::RegexSet;
+//! use openpgp::types::SignatureType;
+//!
+//! # fn main() -> openpgp::Result<()> {
+//! let p = &StandardPolicy::new();
+//!
+//! let (ca, _)
+//!     = CertBuilder::general_purpose(None, Some("OpenPGP CA <openpgp-ca@example.com>"))
+//!         .generate()?;
+//! let mut ca_signer = ca.primary_key().key().clone()
+//!     .parts_into_secret()?.into_keypair()?;
+//! let ca_userid = ca.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID").userid();
+//!
+//! // The CA certifies "Alice <alice@example.com>".
+//! let (alice, _)
+//!     = CertBuilder::general_purpose(None, Some("Alice <alice@example.com>"))
+//!         .generate()?;
+//! let alice_userid = alice.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID").userid();
+//! let alice_certification = SignatureBuilder::new(SignatureType::GenericCertification)
+//!     .sign_userid_binding(
+//!         &mut ca_signer,
+//!         alice.primary_key().component(),
+//!         alice_userid)?;
+//! let alice = alice.insert_packets(alice_certification.clone())?;
+//! # assert!(alice.clone().into_packets().any(|p| {
+//! #   match p {
+//! #       Packet::Signature(sig) => sig == alice_certification,
+//! #       _ => false,
+//! #   }
+//! # }));
+//!
+//! // The CA certifies "Bob <bob@some.org>".
+//! let (bob, _)
+//!     = CertBuilder::general_purpose(None, Some("Bob <bob@some.org>"))
+//!         .generate()?;
+//! let bob_userid = bob.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID").userid();
+//! let bob_certification = SignatureBuilder::new(SignatureType::GenericCertification)
+//!     .sign_userid_binding(
+//!         &mut ca_signer,
+//!         bob.primary_key().component(),
+//!         bob_userid)?;
+//! let bob = bob.insert_packets(bob_certification.clone())?;
+//! # assert!(bob.clone().into_packets().any(|p| {
+//! #   match p {
+//! #       Packet::Signature(sig) => sig == bob_certification,
+//! #       _ => false,
+//! #   }
+//! # }));
+//!
+//!
+//! // Carol tsigns the CA's certificate.
+//! let (carol, _)
+//!     = CertBuilder::general_purpose(None, Some("Carol <carol@another.net>"))
+//!         .generate()?;
+//! let mut carol_signer = carol.primary_key().key().clone()
+//!     .parts_into_secret()?.into_keypair()?;
+//!
+//! let ca_tsig = SignatureBuilder::new(SignatureType::GenericCertification)
+//!     .set_trust_signature(2, 120)?
+//!     .set_regular_expression("<[^>]+[@.]example\\.org>$")?
+//!     .add_regular_expression("<[^>]+[@.]example\\.com>$")?
+//!     .sign_userid_binding(
+//!         &mut carol_signer,
+//!         ca.primary_key().component(),
+//!         ca_userid)?;
+//! let ca = ca.insert_packets(ca_tsig.clone())?;
+//! # assert!(ca.clone().into_packets().any(|p| {
+//! #   match p {
+//! #       Packet::Signature(sig) => sig == ca_tsig,
+//! #       _ => false,
+//! #   }
+//! # }));
+//!
+//!
+//! // Carol now tries to authenticate Alice and Bob's certificates
+//! // using the CA as a trusted introducer based on `ca_tsig`.
+//! let res = RegexSet::from_signature(&ca_tsig)?;
+//!
+//! // Should should be able to authenticate Alice.
+//! let alice_ua = alice.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID");
+//! # assert!(res.matches_userid(&alice_ua));
+//! let mut authenticated = false;
+//! for c in alice_ua.certifications() {
+//!     if c.get_issuers().into_iter().any(|h| h.aliases(ca.key_handle())) {
+//!         if c.clone().verify_userid_binding(
+//!             ca.primary_key().key(),
+//!             alice.primary_key().key(),
+//!             alice_ua.userid()).is_ok()
+//!         {
+//!             authenticated |= res.matches_userid(&alice_ua);
+//!         }
+//!     }
+//! }
+//! assert!(authenticated);
+//!
+//! // But, although the CA has certified Bob's key, Carol doesn't rely
+//! // on it, because Bob's email address ("bob@some.org") is out of
+//! // scope (some.org, not example.com).
+//! let bob_ua = bob.with_policy(p, None)?
+//!     .userids().nth(0).expect("Added a User ID");
+//! # assert!(! res.matches_userid(&bob_ua));
+//! let mut have_certification = false;
+//! let mut authenticated = false;
+//! for c in bob_ua.certifications() {
+//!     if c.get_issuers().into_iter().any(|h| h.aliases(ca.key_handle())) {
+//!         if c.clone().verify_userid_binding(
+//!             ca.primary_key().key(),
+//!             bob.primary_key().key(),
+//!             bob_ua.userid()).is_ok()
+//!         {
+//!             have_certification = true;
+//!             authenticated |= res.matches_userid(&bob_ua);
+//!         }
+//!     }
+//! }
+//! assert!(have_certification);
+//! assert!(! authenticated);
+//! # Ok(()) }
+//! ```
+
+use std::borrow::Borrow;
+
+use lalrpop_util::ParseError;
+use regex_syntax::hir::{self, Hir};
+use regex;
+
+use crate::Error;
+use crate::Result;
+use crate::packet::prelude::*;
+use crate::types::SignatureType;
+
+pub(crate) mod lexer;
+lalrpop_util::lalrpop_mod!(
+    #[allow(clippy::all)]
+    #[allow(unused_parens)]
+    grammar,
+    "/regex/grammar.rs"
+);
+
+pub(crate) use self::lexer::Token;
+pub(crate) use self::lexer::{Lexer, LexicalError};
+
+const TRACE: bool = false;
+
+// Convert tokens into strings.
+//
+// Unfortunately, we can't implement From, because we don't define
+// ParseError in this crate.
+pub(crate) fn parse_error_downcast(e: ParseError<usize, Token, LexicalError>)
+    -> ParseError<usize, String, LexicalError>
+{
+    match e {
+        ParseError::UnrecognizedToken {
+            token: (start, t, end),
+            expected,
+        } => ParseError::UnrecognizedToken {
+            token: (start, t.into(), end),
+            expected,
+        },
+
+        ParseError::ExtraToken {
+            token: (start, t, end),
+        } => ParseError::ExtraToken {
+            token: (start, t.into(), end),
+        },
+
+        ParseError::InvalidToken { location }
+        => ParseError::InvalidToken { location },
+
+        ParseError::User { error }
+        => ParseError::User { error },
+
+        ParseError::UnrecognizedEOF { location, expected }
+        => ParseError::UnrecognizedEOF { location, expected },
+    }
+}
+
+// Used by grammar.lalrpop to generate a regex class (e.g. '[a-ce]').
+fn generate_class(caret: bool, chars: impl Iterator<Item=char>) -> Hir
+{
+    tracer!(TRACE, "generate_class");
+
+    // Dealing with ranges is a bit tricky.  We need to examine three
+    // tokens.  If the middle one is a dash, it's a range.
+
+    let chars: Vec<Option<char>> = chars
+        // Pad it out so what we can use windows to get three
+        // characters at a time, and be sure to process all
+        // characters.
+        .map(|c| Some(c))
+        .chain(std::iter::once(None))
+        .chain(std::iter::once(None))
+        .collect();
+    if chars.len() == 2 {
+        // The grammar doesn't allow an empty class.
+        unreachable!();
+    } else {
+        let r = chars
+            .windows(3)
+            .scan(0,
+                  |skip: &mut usize, x: &[Option<char>]|
+                      // Scan stops if the result is None.
+                      // filter_map keeps only those elements that
+                      // are Some.
+                      -> Option<Option<hir::ClassUnicodeRange>>
+                  {
+                      if *skip > 0 {
+                          *skip -= 1;
+                          t!("Skipping: {:?} (skip now: {})", x, skip);
+                          Some(None)
+                      } else {
+                          match (x[0], x[1], x[2]) {
+                              (Some(a), Some('-'), Some(c)) => {
+                                  // We've got a real range.
+                                  *skip = 2;
+                                  t!("range for '{}-{}'", a, c);
+                                  Some(Some(hir::ClassUnicodeRange::new(a, c)))
+                              }
+                              (Some(a), _, _) => {
+                                  t!("range for '{}'", a);
+                                  Some(Some(hir::ClassUnicodeRange::new(a, a)))</
author	Neal H. Walfield <neal@pep.foundation>	2021-01-08 14:29:22 +0100
committer	Neal H. Walfield <neal@pep.foundation>	2021-01-08 14:35:01 +0100
commit	3367828e487fa79d2ec0a615e6b15dd5fbee5c9a (patch)
tree	eeb9f0652345a5f3a7b73609fb2c23475a2b71a9
parent	17253b2e7fa297426a841042fa955ca69955f00f (diff)