summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeal H. Walfield <neal@pep.foundation>2019-04-30 09:11:57 +0200
committerNeal H. Walfield <neal@pep.foundation>2019-04-30 09:11:57 +0200
commit9f0a3e7e043024ea2ee2ef546c1832ac8a579b4e (patch)
tree2cc9d2ad032bfd6197e16044cc6969b3db7ced2e
parentd6eb647ef27bff222bb1bc04b8a7307a864a63cc (diff)
rfc2822: Improve comments
-rw-r--r--rfc2822/src/grammar.lalrpop33
-rw-r--r--rfc2822/src/lexer.rs38
2 files changed, 17 insertions, 54 deletions
diff --git a/rfc2822/src/grammar.lalrpop b/rfc2822/src/grammar.lalrpop
index e400a0aa..4cc571f3 100644
--- a/rfc2822/src/grammar.lalrpop
+++ b/rfc2822/src/grammar.lalrpop
@@ -33,7 +33,8 @@ grammar<'input>;
// A further convention is an ssh-host-uri production:
//
// ssh-host-uri = "ssh://" dns-hostname
-
+//
+// Support for this should be added in the future.
CRLF: () = {
CR LF
@@ -83,7 +84,7 @@ specials : Token<'input> = {
// quoted-pair = ("\" text) / obs-qp
//
-// In RFC 2822, text is a single character and the BACKSLAH is
+// In RFC 2822, text is a single character and the BACKSLASH is
// followed by exactly one character. As an optimization, our lexer
// groups runs of 'text' characters into a single token, Token::OTHER.
// Since a quoted pair can always be followed by a run of OTHER
@@ -272,8 +273,8 @@ atom : Vec<Component> = {
c2),
}
-// See the phrase production for this variant of the 'atom' production
-// exists, and why the 'CFWS?'es are not included.
+// See the phrase production for why this variant of the 'atom'
+// production exists, and why the 'CFWS?'es are not included.
atom_prime : Component = {
<a:atext_dot_plus> => Component::Text(a),
}
@@ -544,18 +545,18 @@ pub(crate) NameAddr : Vec<Component> = {
}
// The display_name ends in an optional CFWS and the angle_addr starts
-// with one. This causes an ambiguity. The angle_addr_prime
-// production removes the optional leading CFWS non-terminal.
-//
-// But, this creates a small problem. Consider:
+// with one. This causes an ambiguity. We resolve the ambiguity by
+// introducing the angle_addr_prime production, which doesn't match a
+// leading CFWS non-terminal. But, this creates another small
+// problem. Consider:
//
// " <email@example.org>"
//
-// This is: [CFWS angle-addr]. Now, we are using angle-addr-prime so
-// that it won't match leading CFWSes, because they are matched by
-// display-name. But display-name doesn't match in this case because
-// there are no phrases, and it requires at least on phrase! The
-// second rule below covers this edge case.
+// This is: [CFWS angle-addr-prime]. The CFWS isn't folded into the
+// angle-addr-prime to fix the aforementioned ambiguity. But it also
+// doesn't reduce to a display-name, because there are no phrases, and
+// display-name requires at least one phrase! Thus, we special case
+// this.
name_addr : Vec<Component> = {
<n:display_name?> <a:angle_addr_prime> =>
components_concat!(n, a),
@@ -609,7 +610,7 @@ addr_spec : Vec<Component> = {
//
// is valid (it's foo@bar.com).
- // The local part may start with commends and the domain part
+ // The local part may start with comments and the domain part
// may end with comments.
let local_part = l.pop().expect("empty local_part");
let domain = d.remove(0);
@@ -675,7 +676,7 @@ domain_literal : Vec<Component> = {
})
.flatten()
.collect::<Vec<Component>>(),
- // d is an Option<Component>, turn it into a
+ // d is an Option<Component>, turn it into an
// Option<Vec<Component>>.
d.map(|x| vec![x]),
Component::Text("]".into()),
@@ -700,7 +701,7 @@ domain_literal_right : Vec<Component> = {
})
.flatten()
.collect::<Vec<Component>>(),
- // d is an Option<Component>, turn it into a
+ // d is an Option<Component>, turn it into an
// Option<Vec<Component>>.
d.map(|x| vec![x]),
Component::Text("]".into()),
diff --git a/rfc2822/src/lexer.rs b/rfc2822/src/lexer.rs
index 5d7eb049..eadd4aa2 100644
--- a/rfc2822/src/lexer.rs
+++ b/rfc2822/src/lexer.rs
@@ -85,44 +85,6 @@ impl<'input> Lexer<'input> {
// 3.2.1. Primitive Tokens
-// The symbols. The default tokenizer returns &str, but we want
-// chars. So, we need to do a little dance.
-//
-// match {
-// // All unicode white space.
-// // 2.2.2. says that whitespace is only ' ' and '\t'.
-// r" \t" => WSP_TOKEN,
-//
-// r"(?x)
-// [\x01-\x08 # %d1-8 /
-// \x0b # %d11 /
-// \x0c # %d12 /
-// \x0e-\x1f # %d14-31 /
-// \x7f # %d127
-// ]" => NO_WS_CTL_TOKEN,
-//
-// "\r" => CR_TOKEN,
-// "\n" => LF_TOKEN,
-//
-// // specials
-// "(" => LPAREN_TOKEN,
-// ")" => RPAREN_TOKEN,
-// "<" => LANGLE_TOKEN,
-// ">" => RANGLE_TOKEN,
-// "[" => LBRACKET_TOKEN,
-// "]" => RBRACKET_TOKEN,
-// ":" => COLON_TOKEN,
-// ";" => SEMICOLON_TOKEN,
-// "@" => AT_TOKEN,
-// "\\" => BACKSLASH_TOKEN,
-// "," => COMMA_TOKEN,
-// "." => DOT_TOKEN,
-// "\"" => DQUOTE_TOKEN,
-// } else {
-// // Everything else.
-// r"." => OTHER_TOKEN
-// }
-
impl<'input> Iterator for Lexer<'input> {
type Item = LexerItem<Token<'input>, usize, Error>;