rfc2822: Improve comments

author: Neal H. Walfield <neal@pep.foundation> 2019-04-30 09:11:57 +0200
committer: Neal H. Walfield <neal@pep.foundation> 2019-04-30 09:11:57 +0200
commit: 9f0a3e7e043024ea2ee2ef546c1832ac8a579b4e (patch)
tree: 2cc9d2ad032bfd6197e16044cc6969b3db7ced2e
parent: d6eb647ef27bff222bb1bc04b8a7307a864a63cc (diff)
2 files changed, 17 insertions, 54 deletions
diff --git a/rfc2822/src/grammar.lalrpop b/rfc2822/src/grammar.lalrpop
index e400a0aa..4cc571f3 100644
--- a/rfc2822/src/grammar.lalrpop
+++ b/rfc2822/src/grammar.lalrpop
@@ -33,7 +33,8 @@ grammar<'input>;
 // A further convention is an ssh-host-uri production:
 //
 //   ssh-host-uri = "ssh://" dns-hostname
-
+//
+// Support for this should be added in the future.
 
 CRLF: () = {
     CR LF
@@ -83,7 +84,7 @@ specials : Token<'input> = {
 
 // quoted-pair     =       ("\" text) / obs-qp
 //
-// In RFC 2822, text is a single character and the BACKSLAH is
+// In RFC 2822, text is a single character and the BACKSLASH is
 // followed by exactly one character.  As an optimization, our lexer
 // groups runs of 'text' characters into a single token, Token::OTHER.
 // Since a quoted pair can always be followed by a run of OTHER
@@ -272,8 +273,8 @@ atom : Vec<Component> = {
             c2),
 }
 
-// See the phrase production for this variant of the 'atom' production
-// exists, and why the 'CFWS?'es are not included.
+// See the phrase production for why this variant of the 'atom'
+// production exists, and why the 'CFWS?'es are not included.
 atom_prime : Component = {
     <a:atext_dot_plus> => Component::Text(a),
 }
@@ -544,18 +545,18 @@ pub(crate) NameAddr : Vec<Component> = {
 }
 
 // The display_name ends in an optional CFWS and the angle_addr starts
-// with one.  This causes an ambiguity.  The angle_addr_prime
-// production removes the optional leading CFWS non-terminal.
-//
-// But, this creates a small problem.  Consider:
+// with one.  This causes an ambiguity.  We resolve the ambiguity by
+// introducing the angle_addr_prime production, which doesn't match a
+// leading CFWS non-terminal.  But, this creates another small
+// problem.  Consider:
 //
 //   " <email@example.org>"
 //
-// This is: [CFWS angle-addr].  Now, we are using angle-addr-prime so
-// that it won't match leading CFWSes, because they are matched by
-// display-name.  But display-name doesn't match in this case because
-// there are no phrases, and it requires at least on phrase!  The
-// second rule below covers this edge case.
+// This is: [CFWS angle-addr-prime].  The CFWS isn't folded into the
+// angle-addr-prime to fix the aforementioned ambiguity.  But it also
+// doesn't reduce to a display-name, because there are no phrases, and
+// display-name requires at least one phrase!  Thus, we special case
+// this.
 name_addr : Vec<Component> = {
     <n:display_name?> <a:angle_addr_prime> =>
         components_concat!(n, a),
@@ -609,7 +610,7 @@ addr_spec : Vec<Component> = {
         //
         // is valid (it's foo@bar.com).
 
-        // The local part may start with commends and the domain part
+        // The local part may start with comments and the domain part
         // may end with comments.
         let local_part = l.pop().expect("empty local_part");
         let domain = d.remove(0);
@@ -675,7 +676,7 @@ domain_literal : Vec<Component> = {
                 })
                 .flatten()
                 .collect::<Vec<Component>>(),
-            // d is an Option<Component>, turn it into a
+            // d is an Option<Component>, turn it into an
             // Option<Vec<Component>>.
             d.map(|x| vec![x]),
             Component::Text("]".into()),
@@ -700,7 +701,7 @@ domain_literal_right : Vec<Component> = {
                 })
                 .flatten()
                 .collect::<Vec<Component>>(),
-            // d is an Option<Component>, turn it into a
+            // d is an Option<Component>, turn it into an
             // Option<Vec<Component>>.
             d.map(|x| vec![x]),
             Component::Text("]".into()),
diff --git a/rfc2822/src/lexer.rs b/rfc2822/src/lexer.rs
index 5d7eb049..eadd4aa2 100644
--- a/rfc2822/src/lexer.rs
+++ b/rfc2822/src/lexer.rs
@@ -85,44 +85,6 @@ impl<'input> Lexer<'input> {
 
 // 3.2.1. Primitive Tokens
 
-// The symbols.  The default tokenizer returns &str, but we want
-// chars.  So, we need to do a little dance.
-//
-// match {
-//     // All unicode white space.
-//     // 2.2.2. says that whitespace is only ' ' and '\t'.
-//     r" \t" => WSP_TOKEN,
-// 
-//     r"(?x)
-//         [\x01-\x08         # %d1-8 /
-//          \x0b              # %d11 /
-//          \x0c              # %d12 /
-//          \x0e-\x1f         # %d14-31 /
-//          \x7f              # %d127
-//         ]" => NO_WS_CTL_TOKEN,
-// 
-//     "\r" => CR_TOKEN,
-//     "\n" => LF_TOKEN,
-// 
-//     // specials
-//     "(" => LPAREN_TOKEN,
-//     ")" => RPAREN_TOKEN,
-//     "<" => LANGLE_TOKEN,
-//     ">" => RANGLE_TOKEN,
-//     "[" => LBRACKET_TOKEN,
-//     "]" => RBRACKET_TOKEN,
-//     ":" => COLON_TOKEN,
-//     ";" => SEMICOLON_TOKEN,
-//     "@" => AT_TOKEN,
-//     "\\" => BACKSLASH_TOKEN,
-//     "," => COMMA_TOKEN,
-//     "." => DOT_TOKEN,
-//     "\"" =>  DQUOTE_TOKEN,
-// } else {
-//     // Everything else.
-//     r"." =>  OTHER_TOKEN
-// }
-
 impl<'input> Iterator for Lexer<'input> {
     type Item = LexerItem<Token<'input>, usize, Error>;
author	Neal H. Walfield <neal@pep.foundation>	2019-04-30 09:11:57 +0200
committer	Neal H. Walfield <neal@pep.foundation>	2019-04-30 09:11:57 +0200
commit	9f0a3e7e043024ea2ee2ef546c1832ac8a579b4e (patch)
tree	2cc9d2ad032bfd6197e16044cc6969b3db7ced2e
parent	d6eb647ef27bff222bb1bc04b8a7307a864a63cc (diff)