summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeal H. Walfield <neal@pep.foundation>2019-04-11 23:31:18 +0200
committerNeal H. Walfield <neal@pep.foundation>2019-04-12 11:39:37 +0200
commit5c7e4274102748b287e81ffb195a918f442e2e13 (patch)
tree60487c5edc7af1e1fc2a1e62db03b760443549c9
parente3b30cebc5fabcd9abc1b4bd109c4816e06a2f01 (diff)
New crate sequoia-rfc2822.
- An RFC 2882 mail name-addr parser.
-rw-r--r--Cargo.toml1
-rw-r--r--rfc2822/Cargo.toml27
-rw-r--r--rfc2822/README.md44
-rw-r--r--rfc2822/build.rs11
-rw-r--r--rfc2822/src/component.rs212
-rw-r--r--rfc2822/src/grammar.lalrpop698
-rw-r--r--rfc2822/src/grammar.rs2
-rw-r--r--rfc2822/src/lexer.rs212
-rw-r--r--rfc2822/src/lib.rs898
-rw-r--r--rfc2822/src/macros.rs19
-rw-r--r--rfc2822/src/strings.rs57
-rw-r--r--rfc2822/src/trace.rs63
12 files changed, 2244 insertions, 0 deletions
diff --git a/Cargo.toml b/Cargo.toml
index a574d220..e4267002 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,6 +23,7 @@ maintenance = { status = "actively-developed" }
[dependencies]
buffered-reader = { path = "buffered-reader", version = "0.5" }
+sequoia-rfc2822 = { path = "rfc2822", version = "0.1" }
sequoia-openpgp = { path = "openpgp", version = "0.5" }
sequoia-openpgp-ffi = { path = "openpgp-ffi", version = "0.5" }
sequoia-core = { path = "core", version = "0.5" }
diff --git a/rfc2822/Cargo.toml b/rfc2822/Cargo.toml
new file mode 100644
index 00000000..82888ba8
--- /dev/null
+++ b/rfc2822/Cargo.toml
@@ -0,0 +1,27 @@
+[package]
+name = "sequoia-rfc2822"
+description = "An RFC 2822 name-addr parser"
+version = "0.1.0"
+authors = [
+ "Justus Winter <justus@sequoia-pgp.org>",
+ "Neal H. Walfield <neal@sequoia-pgp.org>",
+]
+build = "build.rs"
+documentation = "https://docs.sequoia-pgp.org/0.1.0/rfc2822"
+homepage = "https://sequoia-pgp.org/"
+repository = "https://gitlab.com/sequoia-pgp/sequoia"
+readme = "README.md"
+license = "GPL-3.0"
+keywords = ["rfc2822", "rfc822", "name-addr", "email"]
+categories = ["email"]
+
+[badges]
+gitlab = { repository = "sequoia-pgp/sequoia" }
+maintenance = { status = "actively-developed" }
+
+[dependencies]
+failure = "0.1.2"
+lalrpop-util = "0.16"
+
+[build-dependencies]
+lalrpop = "0.16"
diff --git a/rfc2822/README.md b/rfc2822/README.md
new file mode 100644
index 00000000..db6140a8
--- /dev/null
+++ b/rfc2822/README.md
@@ -0,0 +1,44 @@
+An [RFC 2822] parser.
+
+ [RFC 2822]: https://tools.ietf.org/html/rfc2822
+
+Currently, this crate only recognizes the [RFC 2822] [name-addr] and
+[addr-spec] productions, i.e., things of the form:
+
+ [name-addr]: https://tools.ietf.org/html/rfc2822#section-3.4
+ [addr-spec]: https://tools.ietf.org/html/rfc2822#section-3.4.1
+
+```
+Name (Comment) <email@example.org>
+```
+
+and
+
+```
+email@example.org
+```
+
+Although the above appear simple to parse, [RFC 2822]'s whitespace and
+comment rules are rather complex. This crate implements the whole
+grammar.
+
+As an extension, in addition to ASCII, we also recognize all UTF-8
+text.
+
+Further, we also allow dots in the name-addr Name. That is:
+
+```
+Professor Pippy P. Poopypants <pippy@jerome-horwitz.k12.oh.us>
+```
+
+is recognized. But [RFC 2822] strictly requires that the name be
+quoted:
+
+```
+"Professor Pippy P. Poopypants" <pippy@jerome-horwitz.k12.oh.us>
+```
+
+
+This crate does not (yet) implement the new [RFC 5322].
+
+ [RFC 5322]: https://tools.ietf.org/html/rfc5322
diff --git a/rfc2822/build.rs b/rfc2822/build.rs
new file mode 100644
index 00000000..24051d93
--- /dev/null
+++ b/rfc2822/build.rs
@@ -0,0 +1,11 @@
+extern crate lalrpop;
+
+// Rerun if any of these files change:
+#[allow(dead_code)]
+const SOURCE: [ &'static str; 1 ]
+ = [ include_str!("src/grammar.lalrpop"),
+ ];
+
+fn main() {
+ lalrpop::process_root().unwrap();
+}
diff --git a/rfc2822/src/component.rs b/rfc2822/src/component.rs
new file mode 100644
index 00000000..808f497f
--- /dev/null
+++ b/rfc2822/src/component.rs
@@ -0,0 +1,212 @@
+/// A UserID value typically looks something like:
+///
+/// Text (Comment) <name@example.org>
+///
+/// That is, it contains three components: a text string, a comment,
+/// and an email address.
+///
+/// The actual format allows for lots of interleaved comments and
+/// multiple texts. Thus, when parsing we build up a vector of
+/// Components in the order that they were encountered.
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub enum Component {
+ // A text string.
+ Text(String),
+ // A comment.
+ //
+ // The outermost parens are removed. That is, if the comment is:
+ // "(foo(bar)bam)", then "foo(bar)bam" is stored.
+ Comment(String),
+ // An email address.
+ Address(String),
+ // White space.
+ WS,
+}
+
+impl From<Component> for Vec<Component> {
+ fn from(c: Component) -> Self {
+ vec![c]
+ }
+}
+
+impl From<Component> for Option<Vec<Component>> {
+ fn from(c: Component) -> Self {
+ Some(vec![c])
+ }
+}
+
+// Collect the `Component`s to the vector `v`.
+//
+// The Components can be anything that can be turned into an
+// Option<Vec<Component>>. This currently includes `Component`, and
+// `Vec<Component>`.
+macro_rules! components_concat_into {
+ ( $v:expr, $c:expr ) => {{
+ let v: &mut Vec<Component> = $v;
+ let c : Option<Vec<Component>> = $c.into();
+ if let Some(mut c) = c {
+ // If v ends in a WS and c starts with a WS, then collapse
+ // them.
+ if destructures_to!(Some(Component::WS) = v.last())
+ && destructures_to!(Some(Component::WS) = c.first())
+ {
+ v.pop();
+ }
+ v.append(&mut c);
+ }
+ }};
+ ( $v:expr, $car:expr, $($cdr:expr),* ) => {{
+ let v: &mut Vec<Component> = $v;
+ let car : Option<Vec<Component>> = $car.into();
+ if let Some(mut car) = car {
+ if destructures_to!(Some(Component::WS) = v.last())
+ && destructures_to!(Some(Component::WS) = car.first())
+ {
+ v.pop();
+ }
+ v.append(&mut car)
+ }
+ components_concat_into!(v, $($cdr),*);
+ }};
+}
+
+// Collect the `Component`s into a vector `v`.
+//
+// The Components can be anything that can be turned into an
+// Option<Vec<Component>>. This currently includes `Component`, and
+// `Vec<Component>`.
+macro_rules! components_concat {
+ ( $( $args:expr ),*) => {{
+ let mut v : Vec<Component> = Vec::new();
+ components_concat_into!(&mut v, $($args),*);
+ v
+ }};
+}
+
+// Kills leading (`left`) and/or trailing (`right`) whitespace
+// (`Component::WS`).
+pub(crate) fn components_kill_ws(v: Option<Vec<Component>>,
+ left: bool, right: bool)
+ -> Vec<Component>
+{
+ tracer!(::TRACE, "components_kill_ws");
+ t!("v: {:?}, left: {}, right: {}", v, left, right);
+
+ let v = if let Some(mut v) = v {
+ if v.len() > 0 && right {
+ let mut kill = false;
+ if let Component::WS = v[v.len() - 1] {
+ kill = true;
+ }
+ if kill {
+ v.pop();
+ }
+ }
+ if v.len() > 0 && left {
+ let mut kill = false;
+ if let Component::WS = v[0] {
+ kill = true;
+ }
+ if kill {
+ v.remove(0);
+ }
+ }
+ v
+ } else {
+ vec![]
+ };
+ t!("=> {:?}", v);
+ v
+}
+
+// Merge the components in the vector.
+pub(crate) fn components_merge(components: Vec<Component>)
+ -> Vec<Component>
+{
+ tracer!(::TRACE, "components_merge", 0);
+ t!("{:?}", components);
+
+ let mut iter = components.into_iter();
+ let mut components = vec![];
+
+ let mut left = if let Some(left) = iter.next() {
+ left
+ } else {
+ return components;
+ };
+ let mut middleo = iter.next();
+ let mut righto = iter.next();
+
+ while let Some(mut middle) = middleo {
+ enum Kill {
+ None,
+ Middle,
+ MiddleRight,
+ Right,
+ };
+ let mut kill = Kill::None;
+
+ match (&mut left, &mut middle, righto.as_mut()) {
+ (Component::Text(ref mut l),
+ Component::Text(ref mut m),
+ _) => {
+ t!("Merging '{}' and '{}'", l, m);
+ l.push_str(m);
+ kill = Kill::Middle;
+ },
+
+ (Component::Text(ref mut l),
+ Component::WS,
+ Some(Component::Text(ref mut r))) => {
+ t!("Merging '{}', WS and '{}'", l, r);
+ l.push(' ');
+ l.push_str(r);
+ kill = Kill::MiddleRight;
+ },
+ (_,
+ Component::WS,
+ Some(Component::WS)) => {
+ // This can happen when we have a local-part of the
+ // following form:
+ //
+ // (comment) foo (comment)
+ //
+ // The local-part is produced by the dot_atom_left
+ // production, which puts the dot_atom_text (foo) to
+ // the right:
+ //
+ // COMMENT WS WS COMMENT TEXT
+ kill = Kill::Right;
+ },
+ _ => (),
+ }
+
+ match kill {
+ Kill::Middle => {
+ middleo = righto;
+ righto = iter.next();
+ }
+ Kill::MiddleRight => {
+ middleo = iter.next();
+ righto = iter.next();
+ }
+ Kill::Right => {
+ middleo = Some(middle);
+ righto = iter.next();
+ }
+ Kill::None => {
+ components.push(left);
+ left = middle;
+ middleo = righto;
+ righto = iter.next();
+ }
+ }
+ }
+
+ components.push(left);
+ if let Some(middle) = middleo {
+ components.push(middle);
+ }
+
+ components
+}
diff --git a/rfc2822/src/grammar.lalrpop b/rfc2822/src/grammar.lalrpop
new file mode 100644
index 00000000..f20735ed
--- /dev/null
+++ b/rfc2822/src/grammar.lalrpop
@@ -0,0 +1,698 @@
+// -*- mode: Rust; -*-
+use Error;
+
+use strings::{
+ strings_flatten_into,
+ strings_flatten2,
+ strings_flatten,
+};
+use component::{
+ Component,
+ components_kill_ws,
+ components_merge,
+};
+use lexer;
+use lexer::Token;
+
+grammar<'input>;
+
+// RFC 4880 says:
+//
+// 5.11. User ID Packet (Tag 13)
+//
+// A User ID packet consists of UTF-8 text that is intended to represent
+// the name and email address of the key holder. By convention, it
+// includes an RFC 2822 [RFC2822] mail name-addr, but there are no
+// restrictions on its content.
+//
+// At least today, the convention is more along the lines of RFC
+// 2822's mailbox instead of its name-addr. The only different is
+// that the mailbox production allows for a bare email address i.e.,
+// one without angle brackets whereas the name-addr production
+// requires angle brackets.
+//
+// A further convention is an ssh-host-uri production:
+//
+// ssh-host-uri = "ssh://" dns-hostname
+
+
+CRLF: () = {
+ CR LF
+}
+
+// text = %d1-9 / ; Characters excluding CR and LF
+// %d11 /
+// %d12 /
+// %d14-127 /
+// obs-text
+text : Token<'input> = {
+ WSP,
+ NO_WS_CTL,
+ specials,
+ OTHER,
+}
+
+// specials = "(" / ")" / ; Special characters used in
+// "<" / ">" / ; other parts of the syntax
+// "[" / "]" /
+// ":" / ";" /
+// "@" / "\" /
+// "," / "." /
+// DQUOTE
+specials : Token<'input> = {
+ LPAREN,
+ RPAREN,
+ LANGLE,
+ RANGLE,
+ LBRACKET,
+ RBRACKET,
+ COLON,
+ SEMICOLON,
+ AT,
+ BACKSLASH,
+ COMMA,
+ DOT,
+ DQUOTE,
+};
+
+
+// 3.2.2. Quoted characters
+
+// quoted-pair = ("\" text) / obs-qp
+//
+// In RFC 2822, text is a single character and the BACKSLAH is
+// followed by exactly one character. As an optimization, our lexer
+// groups runs of 'text' characters into a single token, Token::OTHER.
+// Since a quoted pair can always be followed by a run of OTHER
+// characters, the semantics are preserved.
+quoted_pair : Token<'input> = {
+ BACKSLASH <text>,
+}
+
+// 3.2.3. Folding white space and comments
+
+// Folding white space
+//
+// FWS = ([*WSP CRLF] 1*WSP) / ;
+// obs-FWS
+//
+// Runs of FWS, comment or CFWS that occur between lexical tokens in
+// a structured field header are semantically interpreted as a
+// single space character.
+#[inline]
+FWS : Component = {
+ (WSP* CRLF)? WSP+ => Component::WS,
+}
+
+// ctext = NO-WS-CTL / ; Non white space controls
+// %d33-39 / ; The rest of the US-ASCII
+// %d42-91 / ; characters not including "(",
+// %d93-126 ; ")", or "\"
+ctext : Token<'input> = {
+ NO_WS_CTL,
+
+ // LPAREN,
+ // RPAREN,
+ LANGLE,
+ RANGLE,
+ LBRACKET,
+ RBRACKET,
+ COLON,
+ SEMICOLON,
+ AT,
+ // BACKSLASH,
+ COMMA,
+ DOT,
+ DQUOTE,
+
+ OTHER,
+}
+
+// ccontent = ctext / quoted-pair / comment
+ccontent : String = {
+ <c:ctext> => c.to_string(),
+ <c:quoted_pair> => c.to_string(),
+ <c:comment> => {
+ let mut s = String::new();
+ s.push('(');
+ if let Component::Comment(comment) = c {
+ s.push_str(&comment[..]);
+ } else {
+ panic!("Expected a Component::Comment");
+ }
+ s.push(')');
+ s
+ },
+}
+
+// comment = "(" *([FWS] ccontent) [FWS] ")"
+pub(crate) Comment : Component = {
+ <comment>
+}
+
+comment : Component = {
+ LPAREN <c:(<FWS?> <ccontent>)*> <d:FWS?> RPAREN => {
+ let mut s = strings_flatten2(
+ c.into_iter().map(|(fws, c)| (fws.is_some(), c)), " ");
+
+ if d.is_some() {
+ s.push(' ');
+ }
+
+ Component::Comment(s)
+ },
+}
+
+// CFWS = *([FWS] comment) (([FWS] comment) / FWS)
+pub(crate) Cfws : Vec<Component> = {
+ <c:CFWS> => {
+ components_merge(c)
+ }
+}
+
+CFWS : Vec<Component> = {
+ // <c:(FWS? <comment>)*> FWS? <d:comment> => ...,
+ // <c:(FWS? <comment>)*> FWS => ...,
+
+ // The following is equivalent to the above, but the actions are a
+ // bit simpler.
+ <c:(<FWS?> <comment>)+> => {
+ let v : Vec<Component> = c.into_iter()
+ .map(|(w, c)| {
+ if let Some(w) = w {
+ vec![w, c]
+ } else {
+ vec![c]
+ }
+ })
+ .flatten()
+ .collect();
+ v
+ },
+ <c:(<FWS?> <comment>)*> <w2:FWS> => {
+ let mut v : Vec<Component> = c.into_iter()
+ .map(|(w, c)| {
+ if let Some(w) = w {
+ vec![w, c]
+ } else {
+ vec![c]
+ }
+ })
+ .flatten()
+ .collect();
+ v.push(w2);
+ v
+ }
+}
+
+// 3.2.4. Atom
+
+// atext = ALPHA / DIGIT / ; Any character except controls,
+// "!" / "#" / ; SP, and specials.
+// "$" / "%" / ; Used for atoms
+// "&" / "'" /
+// "*" / "+" /
+// "-" / "/" /
+// "=" / "?" /
+// "^" / "_" /
+// "`" / "{" /
+// "|" / "}" /
+// "~"
+//
+// As an optimization the lexer collects atexts, i.e., Token::OTHER is
+// 1*atext.
+atext_plus : String = {
+ <a:OTHER> => {
+ let a = a.to_string();
+ assert!(a.len() > 0);
+ a
+ },
+}
+
+
+// The display-name in a name-addr production often includes a ., but
+// is not quoted. The RFC even recommends supporting this variation.
+other_or_dot : String = {
+ <a:OTHER> => a.to_string(),
+ <d:DOT> => d.to_string(),
+}
+
+atext_dot_plus : String = {
+ <a:other_or_dot+> => strings_flatten(a.into_iter(), ""),
+}
+
+// atom = [CFWS] 1*atext [CFWS]
+//
+// "Both atom and dot-atom are interpreted as a single unit, comprised
+// of the string of characters that make it up. Semantically, the
+// optional comments and FWS surrounding the rest of the characters
+// are not part of the atom"
+pub(crate) Atom : Vec<Component> = {
+ <a:atom> => components_merge(a),
+}
+
+atom : Vec<Component> = {
+ <c1:CFWS?> <a:atext_dot_plus> <c2:CFWS?> =>
+ components_concat!(
+ components_kill_ws(c1, false, true),
+ Component::Text(a),
+ components_kill_ws(c2, true, false)),
+}
+
+// See the phrase production for this variant of the 'atom' production
+// exists, and why the 'CFWS?'es are not included.
+atom_prime : Component = {
+ <a:atext_dot_plus> => Component::Text(a),
+}
+
+// dot-atom = [CFWS] dot-atom-text [CFWS]
+//
+// "Both atom and dot-atom are interpreted as a single unit, comprised
+// of the string of characters that make it up. Semantically, the
+// optional comments and FWS surrounding the rest of the characters
+// are not part of the atom"
+pub(crate) DotAtom : Vec<Component> = {
+ <d:dot_atom> => components_merge(d),
+}
+
+dot_atom : Vec<Component> = {
+ <c1:CFWS?> <a:dot_atom_text> <c2:CFWS?> =>
+ components_concat!(
+ components_kill_ws(c1, false, true),
+ a,
+ components_kill_ws(c2, true, false)),
+}
+
+// A variant of dot_atom that places all comments to the left.
+dot_atom_left : Vec<Component> = {
+ <c1:CFWS?> <a:dot_atom_text> <c2:CFWS?> =>
+ components_concat!(
+ components_kill_ws(
+ Some(components_concat!(c1, c2)), false, true),
+ a),
+}
+
+// A variant of dot_atom that places all comments to the right.
+dot_atom_right : Vec<Component> = {
+ <c1:CFWS?> <a:dot_atom_text> <c2:CFWS?> =>
+ components_concat!(
+ a,
+ components_kill_ws(
+ Some(components_concat!(c1, c2)), true, false)),
+}
+
+// dot-atom-text = 1*atext *("." 1*atext)
+dot_atom_text : Component = {
+ <v:atext_plus> <w:(DOT <atext_plus>)*> => {
+ let mut v = v;
+ if w.len() > 0 {
+ v.push('.');
+ }
+ Component::Text(
+ strings_flatten_into(v, w.into_iter(), "."))
+ },
+}
+
+// 3.2.5. Quoted strings
+
+// qtext = NO-WS-CTL / ; Non white space controls
+// %d33 / ; The rest of the US-ASCII
+// %d35-91 / ; characters not including "\"
+// %d93-126 ; or the quote character
+qtext : Token<'input> = {
+ NO_WS_CTL,
+
+ LPAREN,
+ RPAREN,
+ LANGLE,
+ RANGLE,
+ LBRACKET,
+ RBRACKET,
+ COLON,
+ SEMICOLON,
+ AT,
+ // BACKSLASH,
+ COMMA,
+ DOT,
+ // DQUOTE,
+
+ OTHER,
+}
+
+// qcontent = qtext / quoted-pair
+qcontent : Component = {
+ <c:qtext> => Component::Text(c.to_string()),
+ <c:quoted_pair> => Component::Text(c.to_string()),
+}
+
+// quoted-string = [CFWS]
+// DQUOTE *([FWS] qcontent) [FWS] DQUOTE
+// [CFWS]
+pub(crate) QuotedString : Vec<Component> = {
+ <q:quoted_string> => components_merge(q),
+}
+
+quoted_string : Vec<Component> = {
+ <c1:CFWS?> DQUOTE <c:(<FWS?> <qcontent>)*> <d:FWS?> DQUOTE <c2:CFWS?> => {
+ // Make sure any leading and trailing whitespace *inside* the
+ // quotes is turned into Component::Text.
+ components_concat!(
+ // c1 is an Option<Vec<Component>>.
+ c1,
+ // c is a Vec<(Option<Component>, Component)>. Turn it
+ // into a Vec<Component>.
+ c.into_iter()
+ .map(|(fws, c)| {
+ if let Some(_) = fws {
+ vec![Component::Text(" ".to_string()), c]
+ } else {
+ vec![c]
+ }
+ })
+ .flatten()
+ .collect::<Vec<Component>>(),
+ // d is an Option<Component>, turn it into a
+ // Option<Vec<Component>>.
+ d.map(|_| vec![Component::Text(" ".to_string())]),
+ c2)
+ },
+}
+
+// Variant of quoted_string that moves all comments to the left.
+quoted_string_left : Vec<Component> = {
+ <c1:CFWS?> DQUOTE <c:(<FWS?> <qcontent>)*> <d:FWS?> DQUOTE <c2:CFWS?> => {
+ // Make sure any leading and trailing whitespace *inside* the
+ // quotes is turned into Component::Text.
+ components_concat!(
+ // c1 is an Option<Vec<Component>>.
+ components_kill_ws(Some(components_concat!(c1, c2)), false, true),
+ // c is a Vec<(Option<Component>, Component)>. Turn it
+ // into a Vec<Component>.
+ c.into_iter()
+ .map(|(fws, c)| {
+ if let Some(_) = fws {
+ vec![Component::Text(" ".to_string()), c]
+ } else {
+ vec![c]
+ }
+ })
+ .flatten()
+ .collect::<Vec<Component>>(),
+ // d is an Option<Component>, turn it into a
+ // Option<Vec<Component>>.
+ d.map(|_| vec![Component::Text(" ".to_string())]))
+ },
+}
+
+// See the phrase production for this variant of the 'quoted_string'
+// production exists, and why the 'CFWS?'es are not included.
+quoted_string_prime : Vec<Component> = {
+ DQUOTE <c:(<FWS?> <qcontent>)*> <d:FWS?> DQUOTE => {
+ // Make sure any leading and trailing whitespace *inside* the
+ // quotes is turned into Component::Text.
+ components_concat!(
+ // c is a Vec<(Option<Component>, Component)>. Turn it
+ // into a Vec<Component>.
+ c.into_iter()
+ .map(|(fws, c)| {
+ if let Some(_) = fws {
+ vec![Component::Text(" ".to_string()), c]
+ } else {
+ vec![c]
+ }
+ })
+ .flatten()
+ .collect::<Vec<Component>>(),
+ // d is an Option<Component>, turn it into a
+ // Option<Vec<Component>>.
+ d.map(|_| vec![Component::Text(" ".to_string())]))
+ },
+}
+
+// 3.2.6. Miscellaneous tokens
+
+// word = atom / quoted-string
+pub(crate) Word : Vec<Component> = {
+ <w:word> => components_merge(w),
+}
+
+word : Vec<Component> = {
+ atom,
+ quoted_string,
+}
+
+// phrase = 1*word / obs-phrase
+
+pub(crate) Phrase : Vec<Component> = {
+ <p:phrase> => components_merge(p),
+}
+
+// phrase : String = {
+// <v:word+> => strings_flatten(v, ""),
+// }
+//
+// Note: consider the following parse tree:
+//
+// phrase
+// / \
+// word word
+// / \
+// atom atom
+// / | \ / | \
+// CFWS+? atext+ CFWS? CFWS+? atext+ CFWS?
+//
+// This has an ambiguity! Does a CFWS immediate after the first
+// atext+ belong to the first atom or the second? And, if there are
+// no CFWSes, how do we split the atext?
+//
+// To avoid these problems, we modify the grammar as presented in the
+// RFC as follows:
+atom_or_quoted_string : Vec<Component> = {
+ <a:atom_prime> <r:cfws_or_quoted_string?> => {
+ // Note: it's not possible to have multiple atoms in a row.
+ // The following:
+ //
+ // foo bar
+ //
+ // is 'atom_prime CFWS atom_prime'.
+
+ components_concat!(a, r)