rfc2822: Use quickcheck. Fix edge cases.

- Use quickcheck to test the parser. - To fix a number of edge cases that it uncovered, don't discard Component::WS, and compress them in components_merge more carefully intelligently. - Update the test vectors, and fix the edge cases.
author: Neal H. Walfield <neal@pep.foundation> 2019-04-29 17:58:46 +0200
committer: Neal H. Walfield <neal@pep.foundation> 2019-04-29 18:06:33 +0200
commit: d6eb647ef27bff222bb1bc04b8a7307a864a63cc (patch)
tree: 1cf2d51da320c0724adb814ea4265dcc7f10fda7 /rfc2822
parent: 1fa018acc90194e1cd1ddf2bd0c6706000a3bc9e (diff)
5 files changed, 1265 insertions, 79 deletions
diff --git a/rfc2822/Cargo.toml b/rfc2822/Cargo.toml
index 0b149513..20849a01 100644
--- a/rfc2822/Cargo.toml
+++ b/rfc2822/Cargo.toml
@@ -23,5 +23,10 @@ maintenance = { status = "actively-developed" }
 failure = "0.1.2"
 lalrpop-util = "0.16"
 
+[dev-dependencies]
+lazy_static = "1.3"
+quickcheck = "0.8"
+rand = "0.6"
+
 [build-dependencies]
 lalrpop = "0.16"
diff --git a/rfc2822/src/component.rs b/rfc2822/src/component.rs
index 808f497f..f7b42e4b 100644
--- a/rfc2822/src/component.rs
+++ b/rfc2822/src/component.rs
@@ -83,42 +83,6 @@ macro_rules! components_concat {
     }};
 }
 
-// Kills leading (`left`) and/or trailing (`right`) whitespace
-// (`Component::WS`).
-pub(crate) fn components_kill_ws(v: Option<Vec<Component>>,
-                                 left: bool, right: bool)
-    -> Vec<Component>
-{
-    tracer!(::TRACE, "components_kill_ws");
-    t!("v: {:?}, left: {}, right: {}", v, left, right);
-
-    let v = if let Some(mut v) = v {
-        if v.len() > 0 && right {
-            let mut kill = false;
-            if let Component::WS = v[v.len() - 1] {
-                kill = true;
-            }
-            if kill {
-                v.pop();
-            }
-        }
-        if v.len() > 0 && left {
-            let mut kill = false;
-            if let Component::WS = v[0] {
-                kill = true;
-            }
-            if kill {
-                v.remove(0);
-            }
-        }
-        v
-    } else {
-        vec![]
-    };
-    t!("=> {:?}", v);
-    v
-}
-
 // Merge the components in the vector.
 pub(crate) fn components_merge(components: Vec<Component>)
     -> Vec<Component>
@@ -142,7 +106,6 @@ pub(crate) fn components_merge(components: Vec<Component>)
             None,
             Middle,
             MiddleRight,
-            Right,
         };
         let mut kill = Kill::None;
 
@@ -163,9 +126,9 @@ pub(crate) fn components_merge(components: Vec<Component>)
                 l.push_str(r);
                 kill = Kill::MiddleRight;
             },
-            (_,
+            (Component::WS,
              Component::WS,
-             Some(Component::WS)) => {
+             _) => {
                 // This can happen when we have a local-part of the
                 // following form:
                 //
@@ -176,7 +139,13 @@ pub(crate) fn components_merge(components: Vec<Component>)
                 // the right:
                 //
                 //   COMMENT WS WS COMMENT TEXT
-                kill = Kill::Right;
+                //
+                // It is also possible to have:
+                //
+                //   WS WS COMMENT TEXT
+                //
+                // as CFWS can expand to just a WS.
+                kill = Kill::Middle;
             },
             _ => (),
         }
@@ -190,10 +159,6 @@ pub(crate) fn components_merge(components: Vec<Component>)
                 middleo = iter.next();
                 righto = iter.next();
             }
-            Kill::Right => {
-                middleo = Some(middle);
-                righto = iter.next();
-            }
             Kill::None => {
                 components.push(left);
                 left = middle;
diff --git a/rfc2822/src/grammar.lalrpop b/rfc2822/src/grammar.lalrpop
index f20735ed..e400a0aa 100644
--- a/rfc2822/src/grammar.lalrpop
+++ b/rfc2822/src/grammar.lalrpop
@@ -8,7 +8,6 @@ use strings::{
 };
 use component::{
     Component,
-    components_kill_ws,
     components_merge,
 };
 use lexer;
@@ -45,6 +44,10 @@ CRLF: () = {
 //                         %d12 /
 //                         %d14-127 /
 //                         obs-text
+pub(crate) Text : Token<'input> = {
+    text,
+}
+
 text : Token<'input> = {
     WSP,
     NO_WS_CTL,
@@ -99,6 +102,12 @@ quoted_pair : Token<'input> = {
 //   Runs of FWS, comment or CFWS that occur between lexical tokens in
 //   a structured field header are semantically interpreted as a
 //   single space character.
+
+// FWS can't be exported, because it uses inline.
+pub(crate) FWS_ : Component = {
+    FWS
+}
+
 #[inline]
 FWS : Component = {
    (WSP* CRLF)? WSP+ => Component::WS,
@@ -108,6 +117,10 @@ FWS : Component = {
 //                         %d33-39 /       ; The rest of the US-ASCII
 //                         %d42-91 /       ;  characters not including "(",
 //                         %d93-126        ;  ")", or "\"
+pub(crate) CText : Token<'input> = {
+    ctext
+}
+
 ctext : Token<'input> = {
     NO_WS_CTL,
 
@@ -254,9 +267,9 @@ pub(crate) Atom : Vec<Component> = {
 atom : Vec<Component> = {
     <c1:CFWS?> <a:atext_dot_plus> <c2:CFWS?> =>
         components_concat!(
-            components_kill_ws(c1, false, true),
+            c1,
             Component::Text(a),
-            components_kill_ws(c2, true, false)),
+            c2),
 }
 
 // See the phrase production for this variant of the 'atom' production
@@ -277,28 +290,19 @@ pub(crate) DotAtom : Vec<Component> = {
 
 dot_atom : Vec<Component> = {
     <c1:CFWS?> <a:dot_atom_text> <c2:CFWS?> =>
-        components_concat!(
-            components_kill_ws(c1, false, true),
-            a,
-            components_kill_ws(c2, true, false)),
+        components_concat!(c1, a, c2),
 }
 
 // A variant of dot_atom that places all comments to the left.
 dot_atom_left : Vec<Component> = {
     <c1:CFWS?> <a:dot_atom_text> <c2:CFWS?> =>
-        components_concat!(
-            components_kill_ws(
-                Some(components_concat!(c1, c2)), false, true),
-            a),
+        components_concat!(c1, c2, a),
 }
 
 // A variant of dot_atom that places all comments to the right.
 dot_atom_right : Vec<Component> = {
     <c1:CFWS?> <a:dot_atom_text> <c2:CFWS?> =>
-        components_concat!(
-            a,
-            components_kill_ws(
-                Some(components_concat!(c1, c2)), true, false)),
+        components_concat!(a, c1, c2),
 }
 
 // dot-atom-text   =       1*atext *("." 1*atext)
@@ -340,6 +344,10 @@ qtext : Token<'input> = {
 }
 
 // qcontent        =       qtext / quoted-pair
+pub(crate) QContent : Vec<Component> = {
+    <q:qcontent> => components_merge(vec![ q ]),
+}
+
 qcontent : Component = {
     <c:qtext> => Component::Text(c.to_string()),
     <c:quoted_pair> => Component::Text(c.to_string()),
@@ -359,6 +367,9 @@ quoted_string : Vec<Component> = {
         components_concat!(
             // c1 is an Option<Vec<Component>>.
             c1,
+            // If we have "" make sure we return Component::Text("")
+            // instead of nothing.
+            Component::Text("".into()),
             // c is a Vec<(Option<Component>, Component)>.  Turn it
             // into a Vec<Component>.
             c.into_iter()
@@ -385,7 +396,11 @@ quoted_string_left : Vec<Component> = {
         // quotes is turned into Component::Text.
         components_concat!(
             // c1 is an Option<Vec<Component>>.
-            components_kill_ws(Some(components_concat!(c1, c2)), false, true),
+            c1,
+            c2,
+            // If we have "" make sure we return Component::Text("")
+            // instead of nothing.
+            Component::Text("".into()),
             // c is a Vec<(Option<Component>, Component)>.  Turn it
             // into a Vec<Component>.
             c.into_iter()
@@ -411,6 +426,9 @@ quoted_string_prime : Vec<Component> = {
         // Make sure any leading and trailing whitespace *inside* the
         // quotes is turned into Component::Text.
         components_concat!(
+            // If we have "" make sure we return Component::Text("")
+            // instead of nothing.
+            Component::Text("".into()),
             // c is a Vec<(Option<Component>, Component)>.  Turn it
             // into a Vec<Component>.
             c.into_iter()
@@ -459,11 +477,11 @@ pub(crate) Phrase : Vec<Component> = {
 //                /                    \
 //           atom                        atom
 //       /    |    \                 /    |    \
-// CFWS+?   atext+   CFWS?     CFWS+?   atext+   CFWS?
+//  CFWS?   atext+   CFWS?      CFWS?   atext+   CFWS?
 //
 // This has an ambiguity!  Does a CFWS immediate after the first
 // atext+ belong to the first atom or the second?  And, if there are
-// no CFWSes, how do we split the atext?
+// no CFWSes, how do we split the atext+?
 //
 // To avoid these problems, we modify the grammar as presented in the
 // RFC as follows:
@@ -528,9 +546,22 @@ pub(crate) NameAddr : Vec<Component> = {
 // The display_name ends in an optional CFWS and the angle_addr starts
 // with one.  This causes an ambiguity.  The angle_addr_prime
 // production removes the optional leading CFWS non-terminal.
+//
+// But, this creates a small problem.  Consider:
+//
+//   " <email@example.org>"
+//
+// This is: [CFWS angle-addr].  Now, we are using angle-addr-prime so
+// that it won't match leading CFWSes, because they are matched by
+// display-name.  But display-name doesn't match in this case because
+// there are no phrases, and it requires at least on phrase!  The
+// second rule below covers this edge case.
 name_addr : Vec<Component> = {
     <n:display_name?> <a:angle_addr_prime> =>
         components_concat!(n, a),
+
+    <c:CFWS> <a:angle_addr_prime> =>
+        components_concat!(c, a),
 }
 
 
@@ -551,8 +582,12 @@ angle_addr_prime : Vec<Component> = {
 
 
 // display-name    =       phrase
+pub(crate) DisplayName : Vec<Component> = {
+    <d:display_name> => components_merge(d),
+}
+
 display_name : Vec<Component> = {
-    <p:phrase> => components_kill_ws(Some(p), true, true),
+    <p:phrase> => p,
 }
 
 // 3.4.1. Addr-spec specification
@@ -596,15 +631,23 @@ addr_spec : Vec<Component> = {
 }
 
 // local-part      =       dot-atom / quoted-string / obs-local-part
+pub(crate) LocalPart : Vec<Component> = {
+    <l:local_part> => components_merge(l),
+}
+
 local_part : Vec<Component> = {
     dot_atom_left,
     quoted_string_left,
 }
 
 // domain          =       dot-atom / domain-literal / obs-domain
+pub(crate) Domain : Vec<Component> = {
+    <d:domain> => components_merge(d),
+}
+
 domain : Vec<Component> = {
     dot_atom_right,
-    domain_literal,
+    domain_literal_right,
 }
 
 
@@ -640,8 +683,38 @@ domain_literal : Vec<Component> = {
     }
 }
 
+domain_literal_right : Vec<Component> = {
+    <c1:CFWS?> LBRACKET <c:(<FWS?> <dcontent>)*> <d:FWS?> RBRACKET <c2:CFWS?> => {
+        components_concat!(
+            Component::Text("[".into()),
+            // c is a Vec<(Option<Component>, Component)>.  Turn it
+            // into a Vec<Component>.
+            c.into_iter()
+                .map(|(fws, c)| {
+                    let c = Component::Text(c.to_string());
+                    if let Some(fws) = fws {
+                        vec![fws, c]
+                    } else {
+                        vec![c]
+                    }
+                })
+                .flatten()
+                .collect::<Vec<Component>>(),
+            // d is an Option<Component>, turn it into a
+            // Option<Vec<Component>>.
+            d.map(|x| vec![x]),
+            Component::Text("]".into()),
+            c1,
+            c2)
+    }
+}
+
 
 // dcontent        =       dtext / quoted-pair
+pub(crate) DContent : Vec<Component> = {
+    <d:dcontent> => components_merge(vec![ Component::Text(d.to_string()) ]),
+}
+
 dcontent : Token<'input> = {
     dtext,
     quoted_pair,
diff --git a/rfc2822/src/lib.rs b/rfc2822/src/lib.rs
index 0008a928..5f348814 100644
--- a/rfc2822/src/lib.rs
+++ b/rfc2822/src/lib.rs
@@ -1,6 +1,10 @@
 extern crate failure;
 extern crate lalrpop_util;
 
+#[cfg(test)] #[macro_use] extern crate lazy_static;
+#[cfg(test)] #[macro_use] extern crate quickcheck;
+#[cfg(test)] extern crate rand;
+
 use lalrpop_util::ParseError;
 
 #[macro_use] mod macros;
@@ -22,6 +26,9 @@ mod grammar;
 #[allow(unused_imports, dead_code)]
 mod grammar;
 
+#[cfg(test)]
+mod roundtrip;
+
 const TRACE : bool = false;
 
 pub type Result<T> = ::std::result::Result<T, Error>;
@@ -488,9 +495,17 @@ mod tests {
         // If a CFWS precedes an atom, any comments are retained, but
         // trailing white space is removed.
         c("\r\n foobar  \r\n ",
-          Some(vec![Component::Text("foobar".into())]));
+          Some(vec![
+              Component::WS,
+              Component::Text("foobar".into()),
+              Component::WS,
+          ]));
         c("  \r\n foobar  ",
-          Some(vec![Component::Text("foobar".into())]));
+          Some(vec![
+              Component::WS,
+              Component::Text("foobar".into()),
+              Component::WS,
+          ]));
 
         c("(some comment)foobar",
           Some(vec![
@@ -500,11 +515,13 @@ mod tests {
         c("(some comment) foobar",
           Some(vec![
               Component::Comment("some comment".into()),
+              Component::WS,
               Component::Text("foobar".into())
           ]));
         c("(so\r\n m   \r\n e co\r\n   mme  \r\n   nt\r\n ) \r\n    foobar",
           Some(vec![
               Component::Comment("so m e co mme nt ".into()),
+              Component::WS,
               Component::Text("foobar".into())
           ]));
 
@@ -525,8 +542,9 @@ mod tests {
               Component::Comment("b".into()),
               Component::WS,
               Component::Comment("c".into()),
-              // Whitespace between a comment and an atom is elided.
+              Component::WS,
               Component::Text("foobar".into()),
+              Component::WS,
               Component::Comment("d".into()),
               Component::Comment("e".into()),
               Component::WS,
@@ -685,37 +703,55 @@ mod tests {
         // Internal space is not allowed.
         c("foo bar", None);
 
-        // But lead and trailing space is okay (and it is removed).
+        // But leading and trailing space is okay.
         c(" f",
-          Some(vec![Component::Text("f".into())]));
+          Some(vec![
+              Component::WS,
+              Component::Text("f".into()),
+          ]));
         c("  f",
-          Some(vec![Component::Text("f".into())]));
+          Some(vec![
+              Component::WS,
+              Component::Text("f".into()),
+          ]));
         c("f ",
-          Some(vec![Component::Text("f".into())]));
+          Some(vec![
+              Component::Text("f".into()),
+              Component::WS,
+          ]));
         c("f  ",
-          Some(vec![Component::Text("f".into())]));
+          Some(vec![
+              Component::Text("f".into()),
+              Component::WS,
+          ]));
 
         // Comments are also okay.
         c("(comment) f",
           Some(vec![
               Component::Comment("comment".into()),
+              Component::WS,
               Component::Text("f".into()),
           ]));
         c(" (comment) f",
           Some(vec![
               Component::WS,
               Component::Comment("comment".into()),
+              Component::WS,
               Component::Text("f".into()),
           ]));
         c(" f (comment) ",
           Some(vec![
+              Component::WS,
               Component::Text("f".into()),
+              Component::WS,
               Component::Comment("comment".into()),
               Component::WS,
           ]));
         c(" f (comment)",
           Some(vec![
+              Component::WS,
               Component::Text("f".into()),
+              Component::WS,
               Component::Comment("comment".into()),
           ]));
     }
@@ -782,21 +818,34 @@ mod tests {
         // White space is ignored at the beginning and ending of the
         // local part, but not in the middle.
         c("<  \r\n   foo.bar@x>",
-          Some(vec![Component::Address("foo.bar@x".into())]));
+          Some(vec![
+              Component::WS,
+              Component::Address("foo.bar@x".into()),
+          ]));
         c("<  \r\n   foo.bar \r\n @x>",
-          Some(vec![Component::Address("foo.bar@x".into())]));
+          Some(vec![
+              Component::WS,
+              Component::Address("foo.bar@x".into())
+          ]));
         c("<  (quuz) \r\n   foo.bar@x>",
           Some(vec![
               Component::WS,
               Component::Comment("quuz".into()),
+              Component::WS,
               Component::Address("foo.bar@x".into()),
           ]));
         c("<  \r\n   foo.bar \r\n @x>",
-          Some(vec![Component::Address("foo.bar@x".into())]));
+          Some(vec![
+              Component::WS,
+              Component::Address("foo.bar@x".into()),
+          ]));
         c("<f  \r\n   oo.bar@x>", None);
 
         c("<foo.bar@x \r\n >",
-          Some(vec![Component::Address("foo.bar@x".into())]));
+          Some(vec![
+              Component::Address("foo.bar@x".into()),
+              Component::WS,
+          ]));
         c("<f  \r\n   oo.bar@x \r\n y>", None);
 
         c("  <foo.bar@x>  ",
@@ -811,7 +860,10 @@ mod tests {
           Some(vec![
               Component::WS,
               Component::Comment("Hello!".into()),
-              Component::Address("foo.bar@x".into())]));
+              Component::WS,
+              Component::Address("foo.bar@x".into()),
+              Component::WS,
+          ]));
         // Comments in the local part are always moved left.
         c("< (Hello!) foo.bar (bye?) \r\n @x \r\n >",
           Some(vec![
@@ -819,17 +871,24 @@ mod tests {
               Component::Comment("Hello!".into()),
               Component::WS,
               Component::Comment("bye?".into()),
+              Component::WS,
               Component::Address("foo.bar@x".into()),
+              Component::WS,
           ]));
         c("< (Hello!) foo.bar@x \r\n >",
           Some(vec![
               Component::WS,
               Component::Comment("Hello!".into()),
-              Component::Address("foo.bar@x".into())]));
+              Component::WS,
+              Component::Address("foo.bar@x".into()),
+              Component::WS,
+          ]));
         // Comments in the domain part are always moved right.
         c("< x@  (Hello!) foo.bar (bye?) >",
           Some(vec![
+              Component::WS,
               Component::Address("x@foo.bar".into()),
+              Component::WS,
               Component::Comment("Hello!".into()),
               Component::WS,
               Component::Comment("bye?".into()),
@@ -841,7 +900,10 @@ mod tests {
           Some(vec![
               Component::WS,
               Component::Comment("Hello!".into()),
-              Component::Address("f oo.bar@x".into())]));
+              Component::WS,
+              Component::Address("f oo.bar@x".into()),
+              Component::WS,
+          ]));
     }
 
     #[test]
diff --git a/rfc2822/src/roundtrip.rs b/rfc2822/src/roundtrip.rs
new file mode 100644
index 00000000..b9ecfea5
--- /dev/null
+++ b/rfc2822/src/roundtrip.rs
@@ -0,0 +1,1081 @@
+use std::cmp;
+
+use lalrpop_util::ParseError;
+use quickcheck::{Arbitrary, Gen};
+use rand::Rng;
+use lexer;
+use grammar;
+use component::{Component, components_merge};
+
+// We put each type of token in its own struct, which contains exactly
+// one element, a String (e.g., 'Foo(String)').
+macro_rules! token {
+    ( $name:ident, $g:ident, $arbitrary:block ) => {
+        #[derive(Debug, Clone)]
+        #[allow(non_camel_case_types)]
+        struct $name(String);
+
+        impl $name {
+            fn to_string(self) -> String {
+                self.0.to_string()
+            }
+        }
+
+        impl Arbitrary for $name {
+            fn arbitrary<G: Gen>(g: &mut G) -> Self {
+                let $g = g;
+                // eprintln!("{} -> generating...", stringify!($name));
+                let r = $arbitrary;
+                // eprintln!("{} <- {:?}", stringify!($name), r);
+                $name(r.to_string())
+            }
+        }
+    };
+    ( $name:ident, $g:ident, $arbitrary:block, $self:ident, $shrink:block ) => {
+        #[derive(Debug, Clone)]
+        #[allow(non_camel_case_types)]
+        struct $name(String);
+
+        impl $name {
+            fn to_string(self) -> String {
+                self.0.to_string()
+            }
+        }
+
+        impl Arbitrary for $name {
+            fn arbitrary<G: Gen>(g: &mut G) -> Self {
+                let $g = g;
+                // eprintln!("{} -> generating...", stringify!($name));
+                let r = $arbitrary;
+                // eprintln!("{} <- {:?}", stringify!($name), r);
+                $name(r.to_string())
+            }
+
+            fn shrink(&$self) -> Box<Iterator<Item=Self>> {
+                $shrink
+            }
+        }
+    };
+}
+
+trait Production {
+    fn components(&self) -> Vec<Component>;
+    fn input(&self) -> String;
+
+    fn inner(&self) -> (Vec<Component>, String) {
+        (self.components(), self.input())
+    }
+
+    fn to_input(&self) -> Input {
+        Input::from(self.components(), self.input())
+    }
+}
+
+// Input (s) and its expected parse (c).
+#[derive(Debug, Clone)]
+struct Input {
+    c: Vec<Component>,
+    s: String,
+}
+
+impl Production for Input {
+    fn components(&self) -> Vec<Component> {
+        self.c.clone()
+    }
+
+    fn input(&self) -> String {
+        self.s.clone()
+    }
+}
+
+impl Input {
+    fn new() -> Self {
+        Input {
+            c: vec![],
+            s: String::new(),
+        }
+    }
+
+    fn from<S: AsRef<str>>(c: Vec<Component>, s: S) -> Self {
+        Input {
+            c: c,
+            s: s.as_ref().to_string(),
+        }
+    }
+
+    fn push<S: AsRef<str>>(&mut self, c: Component, s: S) {
+        self.c.push(c);
+        self.s.push_str(s.as_ref());
+    }
+
+    fn append<S: AsRef<str>>(&mut self, mut c: Vec<Component>, s: S) {
+        self.c.append(&mut c);
+        self.s.push_str(s.as_ref());
+    }
+
+    fn concat<P: Production>(&mut self, other: P) {
+        self.c.append(&mut other.components());
+        self.s.push_str(&other.input()[..]);
+    }
+
+    fn push_input<S: AsRef<str>>(&mut self, s: S) {
+        self.s.push_str(s.as_ref())
+    }
+}
+
+macro_rules! production {
+    ( $name:ident, $g:ident, $arbitrary:block ) => {
+        #[derive(Debug, Clone)]
+        struct $name(Input);
+
+        impl Production for $name {
+            fn components(&self) -> Vec<Component> {
+                self.0.components()
+            }
+
+            fn input(&self) -> String {
+                self.0.input()
+            }
+        }
+
+        impl Arbitrary for $name {
+            fn arbitrary<G: Gen>(g: &mut G) -> Self {
+                let $g = g;
+                eprintln!("{} -> generating...", stringify!($name));
+                let r = $arbitrary;
+                eprintln!("{} <- {:?}", stringify!($name), r);
+                $name(r)
+            }
+        }
+    };
+}
+
+macro_rules! parser_quickcheck {
+    ( $type:ident, $parser:ident ) => {
+        quickcheck! {
+            fn $parser(t: $type) -> bool {
+                let s = t.clone().input();
+                let lexer = lexer::Lexer::new(s.as_ref());
+
+                match grammar::$parser::new().parse(lexer) {
+                    Ok(components) => {
+                        let got = components;
+                        let expected = t.components();
+
+                        if got == expected {
+                            true
+                        } else {
+                            eprintln!("     Got: {:?}\nExpected: {:?}",
+                                      got, expected);
+                            for (i, (got, expected))
+                                in got.iter().zip(expected).enumerate()
+                            {
+                                if *got != expected {
+                                    eprintln!("First difference at offset {}: \
+                                               got: {:?}; expected: {:?}",
+                                              i, got, expected);
+                                    break;
+                                }
+                            }
+                            false
+                        }
+                    }
+                    Err(err) => {
+                        eprintln!("Parsing: {:?}: {:?}", t, err);
+                        if let ParseError::UnrecognizedToken {
+                            token: Some((start, _, end)), ..
+                        } = err
+                        {
+                            eprintln!("Context:");
+                            // XXX: This won't work with multi-byte
+                            // characters.  But at that point we've
+                            // already printed the error message.
+                            for i in (cmp::max(5, start) - 5)
+                                ..cmp::min(end + 5, s.len() - 1)
+                            {
+                                eprintln!("{} {}: {:?}",
+                                          if i == start { "*" } else { " " },
+                                          i, &s[i..i+1]);
+                            }
+                        }
+                        false
+                    },
+                }
+            }
+        }
+    };
+}
+
+// ' ' or tab.
+token!(WSP, g, {
+    lazy_static! {
+        static ref WSP_CHARS : Vec<char> = vec![ ' ', '\t' ];
+    }
+
+    WSP_CHARS[g.gen_range(0, WSP_CHARS.len())]
+});
+
+// NO-WS-CTL = %d1-8 /          ; US-ASCII control characters
+//             %d11 /    0xB    ;  that do not include the
+//             %d12 /    0xC    ;  carriage return, line feed,
+//             %d14-31 / 0x
author	Neal H. Walfield <neal@pep.foundation>	2019-04-29 17:58:46 +0200
committer	Neal H. Walfield <neal@pep.foundation>	2019-04-29 18:06:33 +0200
commit	d6eb647ef27bff222bb1bc04b8a7307a864a63cc (patch)
tree	1cf2d51da320c0724adb814ea4265dcc7f10fda7 /rfc2822
parent	1fa018acc90194e1cd1ddf2bd0c6706000a3bc9e (diff)