summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthias Beyer <mail@beyermatthias.de>2019-12-23 12:38:50 +0100
committerMatthias Beyer <mail@beyermatthias.de>2019-12-23 13:37:03 +0100
commit8e7978ca744d0ba02bd28da20caf1a9e4e979968 (patch)
treee759b9da8dba84622d16f6ad1ce7b85911c523e2
parenta41ed49c8e4a960319db5212da437ba25e1651c0 (diff)
Import mailparse code
As the "mailparse" code is licensed as 0BSD, I think I can import this code here without any further arrangements. (IANAL) All credit up to here goes to the author of the "mailparse" crate, of course. The code was a bit restructured into more modules. Signed-off-by: Matthias Beyer <mail@beyermatthias.de>
-rw-r--r--parser/Cargo.toml12
-rw-r--r--parser/src/addrparse.rs624
-rw-r--r--parser/src/body.rs153
-rw-r--r--parser/src/dateparse.rs220
-rw-r--r--parser/src/error.rs70
-rw-r--r--parser/src/lib.rs25
-rw-r--r--parser/src/parser.rs1325
-rw-r--r--parser/src/util.rs47
8 files changed, 2469 insertions, 7 deletions
diff --git a/parser/Cargo.toml b/parser/Cargo.toml
index 3153989..b2366c6 100644
--- a/parser/Cargo.toml
+++ b/parser/Cargo.toml
@@ -15,3 +15,15 @@ autoexamples = true
[dependencies]
+mail-core = { version = "0.6.2", features = ["serde-impl"] }
+mail-headers = { version = "0.6.6", features = ["serde-impl"] }
+mail-internals = "0.2.3"
+
+failure = "0.1"
+vec1 = { version = "1.3.0", features = ["serde"]}
+serde = { version = "1", features = ["derive"] }
+toml = "0.4"
+base64 = "0.11"
+quoted_printable = "0.4"
+charset = "0.1"
+
diff --git a/parser/src/addrparse.rs b/parser/src/addrparse.rs
new file mode 100644
index 0000000..f5da46a
--- /dev/null
+++ b/parser/src/addrparse.rs
@@ -0,0 +1,624 @@
+use std::fmt;
+
+/// A representation of a single mailbox. Each mailbox has
+/// a routing address `addr` and an optional display name.
+#[derive(Clone, Debug, PartialEq)]
+pub struct SingleInfo {
+ pub display_name: Option<String>,
+ pub addr: String,
+}
+
+impl SingleInfo {
+ fn new(name: Option<String>, addr: String) -> Self {
+ SingleInfo {
+ display_name: name,
+ addr: addr,
+ }
+ }
+}
+
+impl fmt::Display for SingleInfo {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ if let Some(name) = &self.display_name {
+ write!(f, r#""{}" <{}>"#, name.replace('"', r#"\""#), self.addr)
+ } else {
+ write!(f, "{}", self.addr)
+ }
+ }
+}
+
+/// A representation of a group address. It has a name and
+/// a list of mailboxes.
+#[derive(Clone, Debug, PartialEq)]
+pub struct GroupInfo {
+ pub group_name: String,
+ pub addrs: Vec<SingleInfo>,
+}
+
+impl GroupInfo {
+ fn new(name: String, addrs: Vec<SingleInfo>) -> Self {
+ GroupInfo {
+ group_name: name,
+ addrs: addrs,
+ }
+ }
+}
+
+impl fmt::Display for GroupInfo {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, r#""{}":"#, self.group_name.replace('"', r#"\""#))?;
+ for (i, addr) in self.addrs.iter().enumerate() {
+ if i == 0 {
+ write!(f, " ")?;
+ } else {
+ write!(f, ", ")?;
+ }
+ addr.fmt(f)?;
+ }
+ write!(f, ";")
+ }
+}
+
+/// An abstraction over the two different kinds of top-level addresses allowed
+/// in email headers. Group addresses have a name and a list of mailboxes. Single
+/// addresses are just a mailbox. Each mailbox consists of what you would consider
+/// an email address (e.g. foo@bar.com) and optionally a display name ("Foo Bar").
+/// Groups are represented in email headers with colons and semicolons, e.g.
+/// To: my-peeps: foo@peeps.org, bar@peeps.org;
+#[derive(Clone, Debug, PartialEq)]
+pub enum MailAddr {
+ Group(GroupInfo),
+ Single(SingleInfo),
+}
+
+#[derive(Debug)]
+enum AddrParseState {
+ Initial,
+ QuotedName,
+ EscapedChar,
+ AfterQuotedName,
+ BracketedAddr,
+ AfterBracketedAddr,
+ Unquoted,
+ TrailerComment,
+}
+
+/// A simple wrapper around `Vec<MailAddr>`. This is primarily here so we can
+/// implement the Display trait on it, and allow user code to easily convert
+/// the return value from `addrparse` back into a string. However there are some
+/// additional utility functions on this wrapper as well.
+#[derive(Clone, Debug, PartialEq)]
+pub struct MailAddrList(Vec<MailAddr>);
+
+impl std::ops::Deref for MailAddrList {
+ type Target = Vec<MailAddr>;
+
+ fn deref(&self) -> &Vec<MailAddr> {
+ &self.0
+ }
+}
+
+impl std::ops::DerefMut for MailAddrList {
+ fn deref_mut(&mut self) -> &mut Vec<MailAddr> {
+ &mut self.0
+ }
+}
+
+impl fmt::Display for MailAddrList {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let mut last_was_group = false;
+ for (i, addr) in self.iter().enumerate() {
+ if i > 0 {
+ if last_was_group {
+ write!(f, " ")?;
+ } else {
+ write!(f, ", ")?;
+ }
+ }
+ match addr {
+ MailAddr::Group(g) => {
+ g.fmt(f)?;
+ last_was_group = true;
+ }
+ MailAddr::Single(s) => {
+ s.fmt(f)?;
+ last_was_group = false;
+ }
+ }
+ }
+ Ok(())
+ }
+}
+
+impl From<Vec<MailAddr>> for MailAddrList {
+ fn from(addrs: Vec<MailAddr>) -> Self {
+ MailAddrList(addrs)
+ }
+}
+
+impl MailAddrList {
+ /// Count the number of `SingleInfo` instances in this list of addresses.
+ pub fn count_addrs(&self) -> usize {
+ self.iter().fold(0, |acc, elem| {
+ match elem {
+ MailAddr::Single(_) => acc + 1,
+ MailAddr::Group(g) => acc + g.addrs.len(),
+ }
+ })
+ }
+
+ /// Convenience function to check if this list of addresses contains exactly
+ /// one `SingleInfo`, and if it does, to return it. If there is not exactly
+ /// one `SingleInfo`, this function returns None.
+ pub fn extract_single_info(self) -> Option<SingleInfo> {
+ if self.len() == 1 {
+ match &self[0] {
+ MailAddr::Group(_) => None,
+ MailAddr::Single(s) => Some(s.clone()),
+ }
+ } else {
+ None
+ }
+ }
+}
+
+/// Convert an address field from an email header into a structured type.
+/// This function handles the most common formatting of to/from/cc/bcc fields
+/// found in email headers.
+///
+/// # Examples
+/// ```
+/// use mailparse::{addrparse, MailAddr, SingleInfo};
+/// match &addrparse("John Doe <john@doe.com>").unwrap()[0] {
+/// MailAddr::Single(info) => {
+/// assert_eq!(info.display_name, Some("John Doe".to_string()));
+/// assert_eq!(info.addr, "john@doe.com".to_string());
+/// }
+/// _ => panic!()
+/// };
+/// ```
+pub fn addrparse(addrs: &str) -> Result<MailAddrList, &'static str> {
+ let mut it = addrs.chars();
+ addrparse_inner(&mut it, false)
+}
+
+fn addrparse_inner(it: &mut std::str::Chars, in_group: bool) -> Result<MailAddrList, &'static str> {
+ let mut result = vec![];
+ let mut state = AddrParseState::Initial;
+
+ let mut c = match it.next() {
+ None => return Ok(MailAddrList(vec![])),
+ Some(v) => v,
+ };
+
+ let mut name = None;
+ let mut addr = None;
+ let mut post_quote_ws = None;
+
+ loop {
+ match state {
+ AddrParseState::Initial => {
+ if c.is_whitespace() {
+ // continue in same state
+ } else if c == '"' {
+ state = AddrParseState::QuotedName;
+ name = Some(String::new());
+ } else if c == '<' {
+ state = AddrParseState::BracketedAddr;
+ addr = Some(String::new());
+ } else if c == ';' {
+ if !in_group {
+ return Err("Unexpected group terminator found in initial list");
+ }
+ return Ok(MailAddrList(result));
+ } else {
+ state = AddrParseState::Unquoted;
+ addr = Some(String::new());
+ addr.as_mut().unwrap().push(c);
+ }
+ }
+ AddrParseState::QuotedName => {
+ if c == '\\' {
+ state = AddrParseState::EscapedChar;
+ } else if c == '"' {
+ state = AddrParseState::AfterQuotedName;
+ } else {
+ name.as_mut().unwrap().push(c);
+ }
+ }
+ AddrParseState::EscapedChar => {
+ state = AddrParseState::QuotedName;
+ name.as_mut().unwrap().push(c);
+ }
+ AddrParseState::AfterQuotedName => {
+ if c.is_whitespace() {
+ if post_quote_ws.is_none() {
+ post_quote_ws = Some(String::new());
+ }
+ post_quote_ws.as_mut().unwrap().push(c);
+ } else if c == '<' {
+ state = AddrParseState::BracketedAddr;
+ addr = Some(String::new());
+ } else if c == ':' {
+ if in_group {
+ return Err("Found unexpected nested group");
+ }
+ let group_addrs = addrparse_inner(it, true)?;
+ state = AddrParseState::Initial;
+ result.push(MailAddr::Group(GroupInfo::new(
+ name.unwrap(),
+ group_addrs.0.into_iter().map(|addr| {
+ match addr {
+ MailAddr::Single(s) => s,
+ MailAddr::Group(_) => panic!("Unexpected nested group encountered"),
+ }
+ }).collect()
+ )));
+ name = None;
+ } else {
+ // I think technically not valid, but this occurs in real-world corpus, so
+ // handle gracefully
+ if c == '"' {
+ post_quote_ws.map(|ws| name.as_mut().unwrap().push_str(&ws));
+ state = AddrParseState::QuotedName;
+ } else {
+ post_quote_ws.map(|ws| name.as_mut().unwrap().push_str(&ws));
+ name.as_mut().unwrap().push(c);
+ }
+ post_quote_ws = None;
+ }
+ }
+ AddrParseState::BracketedAddr => {
+ if c == '>' {
+ state = AddrParseState::AfterBracketedAddr;
+ result.push(MailAddr::Single(SingleInfo::new(name, addr.unwrap())));
+ name = None;
+ addr = None;
+ } else {
+ addr.as_mut().unwrap().push(c);
+ }
+ }
+ AddrParseState::AfterBracketedAddr => {
+ if c.is_whitespace() {
+ // continue in same state
+ } else if c == ',' {
+ state = AddrParseState::Initial;
+ } else if c == ';' {
+ if in_group {
+ return Ok(MailAddrList(result));
+ }
+ // Technically not valid, but a similar case occurs in real-world corpus, so handle it gracefully
+ state = AddrParseState::Initial;
+ } else if c == '(' {
+ state = AddrParseState::TrailerComment;
+ } else {
+ return Err("Unexpected char found after bracketed address");
+ }
+ }
+ AddrParseState::Unquoted => {
+ if c == '<' {
+ state = AddrParseState::BracketedAddr;
+ name = addr.map(|s| s.trim_end().to_owned());
+ addr = Some(String::new());
+ } else if c == ',' {
+ state = AddrParseState::Initial;
+ result.push(MailAddr::Single(SingleInfo::new(None, addr.unwrap().trim_end().to_owned())));
+ addr = None;
+ } else if c == ';' {
+ result.push(MailAddr::Single(SingleInfo::new(None, addr.unwrap().trim_end().to_owned())));
+ if in_group {
+ return Ok(MailAddrList(result));
+ }
+ // Technically not valid, but occurs in real-world corpus, so handle it gracefully
+ state = AddrParseState::Initial;
+ addr = None;
+ } else if c == ':' {
+ if in_group {
+ return Err("Found unexpected nested group");
+ }
+ let group_addrs = addrparse_inner(it, true)?;
+ state = AddrParseState::Initial;
+ result.push(MailAddr::Group(GroupInfo::new(
+ addr.unwrap().trim_end().to_owned(),
+ group_addrs.0.into_iter().map(|addr| {
+ match addr {
+ MailAddr::Single(s) => s,
+ MailAddr::Group(_) => panic!("Unexpected nested group encountered"),
+ }
+ }).collect()
+ )));
+ addr = None;
+ } else {
+ addr.as_mut().unwrap().push(c);
+ }
+ }
+ AddrParseState::TrailerComment => {
+ if c == ')' {
+ state = AddrParseState::AfterBracketedAddr;
+ }
+ }
+ }
+
+ c = match it.next() {
+ None => break,
+ Some(v) => v,
+ };
+ }
+
+ if in_group {
+ return Err("Found unterminated group address");
+ }
+
+ match state {
+ AddrParseState::QuotedName |
+ AddrParseState::EscapedChar |
+ AddrParseState::AfterQuotedName |
+ AddrParseState::BracketedAddr |
+ AddrParseState::TrailerComment => {
+ Err("Address string unexpected terminated")
+ }
+ AddrParseState::Unquoted => {
+ result.push(MailAddr::Single(SingleInfo::new(None, addr.unwrap().trim_end().to_owned())));
+ Ok(MailAddrList(result))
+ }
+ _ => {
+ Ok(MailAddrList(result))
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn parse_basic() {
+ assert_eq!(
+ addrparse("foo bar <foo@bar.com>").unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(Some("foo bar".to_string()), "foo@bar.com".to_string()))])
+ );
+ assert_eq!(
+ addrparse("\"foo bar\" <foo@bar.com>").unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(Some("foo bar".to_string()), "foo@bar.com".to_string()))])
+ );
+ assert_eq!(
+ addrparse("foo@bar.com ").unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(None, "foo@bar.com".to_string()))])
+ );
+ assert_eq!(
+ addrparse("foo <bar>").unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(Some("foo".to_string()), "bar".to_string()))])
+ );
+ assert_eq!(
+ addrparse("\"foo\" <bar>").unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(Some("foo".to_string()), "bar".to_string()))])
+ );
+ assert_eq!(
+ addrparse("\"foo \" <bar>").unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(Some("foo ".to_string()), "bar".to_string()))])
+ );
+ }
+
+ #[test]
+ fn parse_backslashes() {
+ assert_eq!(
+ addrparse(r#" "First \"nick\" Last" <user@host.tld> "#).unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(Some("First \"nick\" Last".to_string()), "user@host.tld".to_string()))])
+ );
+ assert_eq!(
+ addrparse(r#" First \"nick\" Last <user@host.tld> "#).unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(Some("First \\\"nick\\\" Last".to_string()), "user@host.tld".to_string()))])
+ );
+ }
+
+ #[test]
+ fn parse_multi() {
+ assert_eq!(
+ addrparse("foo <bar>, joe, baz <quux>").unwrap(),
+ MailAddrList(vec![
+ MailAddr::Single(SingleInfo::new(Some("foo".to_string()), "bar".to_string())),
+ MailAddr::Single(SingleInfo::new(None, "joe".to_string())),
+ MailAddr::Single(SingleInfo::new(Some("baz".to_string()), "quux".to_string())),
+ ])
+ );
+ }
+
+ #[test]
+ fn parse_empty_group() {
+ assert_eq!(
+ addrparse("empty-group:;").unwrap(),
+ MailAddrList(vec![MailAddr::Group(GroupInfo::new("empty-group".to_string(), vec![]))])
+ );
+ assert_eq!(
+ addrparse(" empty-group : ; ").unwrap(),
+ MailAddrList(vec![MailAddr::Group(GroupInfo::new("empty-group".to_string(), vec![]))])
+ );
+ }
+
+ #[test]
+ fn parse_simple_group() {
+ assert_eq!(
+ addrparse("bar-group: foo <foo@bar.com>;").unwrap(),
+ MailAddrList(vec![
+ MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
+ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
+ ]))
+ ])
+ );
+ assert_eq!(
+ addrparse("bar-group: foo <foo@bar.com>, baz@bar.com;").unwrap(),
+ MailAddrList(vec![
+ MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
+ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
+ SingleInfo::new(None, "baz@bar.com".to_string()),
+ ]))
+ ])
+ );
+ }
+
+ #[test]
+ fn parse_mixed() {
+ assert_eq!(
+ addrparse("joe@bloe.com, bar-group: foo <foo@bar.com>;").unwrap(),
+ MailAddrList(vec![
+ MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string())),
+ MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
+ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
+ ])),
+ ])
+ );
+ assert_eq!(
+ addrparse("bar-group: foo <foo@bar.com>; joe@bloe.com").unwrap(),
+ MailAddrList(vec![
+ MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
+ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
+ ])),
+ MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string())),
+ ])
+ );
+ assert_eq!(
+ addrparse("flim@flam.com, bar-group: foo <foo@bar.com>; joe@bloe.com").unwrap(),
+ MailAddrList(vec![
+ MailAddr::Single(SingleInfo::new(None, "flim@flam.com".to_string())),
+ MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
+ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
+ ])),
+ MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string())),
+ ])
+ );
+ assert_eq!(
+ addrparse("first-group:; flim@flam.com, bar-group: foo <foo@bar.com>; joe@bloe.com, final-group: zip, zap, \"Zaphod\" <zaphod@beeblebrox>;").unwrap(),
+ MailAddrList(vec![
+ MailAddr::Group(GroupInfo::new("first-group".to_string(), vec![])),
+ MailAddr::Single(SingleInfo::new(None, "flim@flam.com".to_string())),
+ MailAddr::Group(GroupInfo::new("bar-group".to_string(), vec![
+ SingleInfo::new(Some("foo".to_string()), "foo@bar.com".to_string()),
+ ])),
+ MailAddr::Single(SingleInfo::new(None, "joe@bloe.com".to_string())),
+ MailAddr::Group(GroupInfo::new("final-group".to_string(), vec![
+ SingleInfo::new(None, "zip".to_string()),
+ SingleInfo::new(None, "zap".to_string()),
+ SingleInfo::new(Some("Zaphod".to_string()), "zaphod@beeblebrox".to_string()),
+ ])),
+ ])
+ );
+ }
+
+ #[test]
+ fn real_world_examples() {
+ // taken from a real "From" header. This might not be valid according to the RFC
+ // but obviously made it through the internet so we should at least not crash.
+ assert_eq!(
+ addrparse("\"The Foo of Bar\" Course Staff <foo-no-reply@bar.edx.org>").unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(Some("The Foo of Bar Course Staff".to_string()), "foo-no-reply@bar.edx.org".to_string()))])
+ );
+
+ // This one has a comment tacked on to the end. Adding proper support for comments seems
+ // complicated so I just added trailer comment support.
+ assert_eq!(
+ addrparse("John Doe <support@github.com> (GitHub Staff)").unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(Some("John Doe".to_string()), "support@github.com".to_string()))])
+ );
+
+ // Taken from a real world "To" header. It was spam, but still...
+ assert_eq!(
+ addrparse("foo@bar.com;").unwrap(),
+ MailAddrList(vec![MailAddr::Single(SingleInfo::new(None, "foo@bar.com".to_string()))])
+ );
+ }
+
+ #[test]
+ fn stringify_single() {
+ let tc = SingleInfo::new(Some("John Doe".to_string()), "john@doe.com".to_string());
+ assert_eq!(tc.to_string(), r#""John Doe" <john@doe.com>"#);
+ assert_eq!(addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Single(tc)]));
+
+ let tc = SingleInfo::new(Some(r#"John "Jack" Doe"#.to_string()), "john@doe.com".to_string());
+ assert_eq!(tc.to_string(), r#""John \"Jack\" Doe" <john@doe.com>"#);
+ assert_eq!(addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Single(tc)]));
+
+ let tc = SingleInfo::new(None, "foo@bar.com".to_string());
+ assert_eq!(tc.to_string(), r#"foo@bar.com"#);
+ assert_eq!(addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Single(tc)]));
+ }
+
+ #[test]
+ fn stringify_group() {
+ let tc = GroupInfo::new("group-name".to_string(), vec![
+ SingleInfo::new(None, "foo@bar.com".to_string()),
+ SingleInfo::new(Some("A".to_string()), "a@b".to_string()),
+ ]);
+ assert_eq!(tc.to_string(), r#""group-name": foo@bar.com, "A" <a@b>;"#);
+ assert_eq!(addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Group(tc)]));
+
+ let tc = GroupInfo::new("empty-group".to_string(), vec![]);
+ assert_eq!(tc.to_string(), r#""empty-group":;"#);
+ assert_eq!(addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Group(tc)]));
+
+ let tc = GroupInfo::new(r#"group-with"quote"#.to_string(), vec![]);
+ assert_eq!(tc.to_string(), r#""group-with\"quote":;"#);
+ assert_eq!(addrparse(&tc.to_string()).unwrap(), MailAddrList(vec![MailAddr::Group(tc)]));
+ }
+
+ #[test]
+ fn stringify_list() {
+ let tc = MailAddrList(vec![
+ MailAddr::Group(GroupInfo::new("marvel".to_string(), vec![
+ SingleInfo::new(None, "ironman@marvel.com".to_string()),
+ SingleInfo::new(None, "spiderman@marvel.com".to_string()),
+ ])),
+ MailAddr::Single(SingleInfo::new(Some("b-man".to_string()), "b@man.com".to_string())),
+ MailAddr::Group(GroupInfo::new("dc".to_string(), vec![
+ SingleInfo::new(None, "batman@dc.com".to_string()),
+ SingleInfo::new(None, "superman@dc.com".to_string()),
+ ])),
+ MailAddr::Single(SingleInfo::new(Some("d-woman".to_string()), "d@woman.com".to_string())),
+ ]);
+ assert_eq!(tc.to_string(),
+ r#""marvel": ironman@marvel.com, spiderman@marvel.com; "b-man" <b@man.com>, "dc": batman@dc.com, superman@dc.com; "d-woman" <d@woman.com>"#);
+ }
+
+ #[test]
+ fn count_addrs() {
+ let tc = MailAddrList(vec![
+ MailAddr::Group(GroupInfo::new("marvel".to_string(), vec![
+ SingleInfo::new(None, "ironman@marvel.com".to_string()),
+ SingleInfo::new(None, "spiderman@marvel.com".to_string()),
+ ])),
+ MailAddr::Single(SingleInfo::new(Some("b-man".to_string()), "b@man.com".to_string())),
+ MailAddr::Group(GroupInfo::new("dc".to_string(), vec![
+ SingleInfo::new(None, "batman@dc.com".to_string()),
+ SingleInfo::new(None, "superman@dc.com".to_string()),
+ ])),
+ MailAddr::Single(SingleInfo::new(Some("d-woman".to_string()), "d@woman.com".to_string())),
+ ]);
+ assert_eq!(tc.count_addrs(), 6);
+ assert_eq!(tc.extract_single_info(), None);
+
+ let tc = MailAddrList(vec![]);
+ assert_eq!(tc.count_addrs(), 0);
+ assert_eq!(tc.extract_single_info(), None);
+
+ let tc = MailAddrList(vec![
+ MailAddr::Group(GroupInfo::new("group".to_string(), vec![
+ SingleInfo::new(None, "foo@bar.com".to_string()),
+ ])),
+ ]);
+ assert_eq!(tc.count_addrs(), 1);
+ assert_eq!(tc.extract_single_info(), None);
+
+ let tc = MailAddrList(vec![
+ MailAddr::Single(SingleInfo::new(None, "foo@bar.com".to_string())),
+ ]);
+ assert_eq!(tc.count_addrs(), 1);
+ assert_eq!(tc.extract_single_info(), Some(SingleInfo::new(None, "foo@bar.com".to_string())));
+
+ let tc = MailAddrList(vec![
+ MailAddr::Group(GroupInfo::new("group".to_string(), vec![])),
+ MailAddr::Group(GroupInfo::new("group".to_string(), vec![])),
+ ]);
+ assert_eq!(tc.count_addrs(), 0);
+ assert_eq!(tc.extract_single_info(), None);
+
+ }
+}
diff --git a/parser/src/body.rs b/parser/src/body.rs
new file mode 100644
index 0000000..4a7fc36
--- /dev/null
+++ b/parser/src/body.rs
@@ -0,0 +1,153 @@
+use charset::{decode_ascii, Charset};
+use crate::error::MailParseError;
+use crate::parser::ParsedContentType;
+
+/// Represents the body of an email (or mail subpart)
+pub enum Body<'a> {
+ /// A body with 'base64' Content-Transfer-Encoding.
+ Base64(EncodedBody<'a>),
+ /// A body with 'quoted-printable' Content-Transfer-Encoding.
+ QuotedPrintable(EncodedBody<'a>),
+ /// A body with '7bit' Content-Transfer-Encoding.
+ SevenBit(TextBody<'a>),
+ /// A body with '8bit' Content-Transfer-Encoding.
+ EightBit(TextBody<'a>),
+ /// A body with 'binary' Content-Transfer-Encoding.
+ Binary(BinaryBody<'a>),
+}
+
+impl<'a> Body<'a> {
+ pub fn new(
+ body: &'a [u8],
+ ctype: &'a ParsedContentType,
+ transfer_encoding: &Option<String>,
+ ) -> Body<'a> {
+ transfer_encoding
+ .as_ref()
+ .map(|encoding| match encoding.as_ref() {
+ "base64" => Body::Base64(EncodedBody {
+ decoder: decode_base64,
+ body,
+ ctype,
+ }),
+ "quoted-printable" => Body::QuotedPrintable(EncodedBody {
+ decoder: decode_quoted_printable,
+ body,
+ ctype,
+ }),
+ "7bit" => Body::SevenBit(TextBody { body, ctype }),
+ "8bit" => Body::EightBit(TextBody { body, ctype }),
+ "binary" => Body::Binary(BinaryBody { body, ctype }),
+ _ => Body::get_default(body, ctype),
+ })
+ .unwrap_or_else(|| Body::get_default(body, ctype))
+ }
+
+ fn get_default(body: &'a [u8], ctype: &'a ParsedContentType) -> Body<'a> {
+ Body::SevenBit(TextBody { body, ctype })
+ }
+}
+
+/// Struct that holds the encoded body representation of the message (or message subpart).
+pub struct EncodedBody<'a> {
+ decoder: fn(&[u8]) -> Result<Vec<u8>, MailParseError>,
+ ctype: &'a ParsedContentType,
+ body: &'a [u8],
+}
+
+impl<'a> EncodedBody<'a> {
+ /// Get the body Content-Type
+ pub fn get_content_type(&self) -> &'a ParsedContentType {
+ self.ctype
+ }
+
+ /// Get the raw body of the message exactly as it is written in the message (or message subpart).
+ pub fn get_raw(&self) -> &'a [u8] {
+ self.body
+ }
+
+ /// Get the decoded body of the message (or message subpart).
+ pub fn get_decoded(&self) -> Result<Vec<u8>, MailParseError> {
+ (self.decoder)(self.body)
+ }
+
+ /// Get the body of the message as a Rust string.
+ /// This function tries to decode the body and then converts
+ /// the result into a Rust UTF-8 string using the charset in the Content-Type
+ /// (or "us-ascii" if the charset was missing or not recognized).
+ /// This operation returns a valid result only if the decoded body
+ /// has a text format.
+ pub fn get_decoded_as_string(&self) -> Result<String, MailParseError> {
+ get_body_as_string(&self.get_decoded()?, &self.ctype)
+ }
+}
+
+/// Struct that holds the textual body representation of the message (or message subpart).
+pub struct TextBody<'a> {
+ ctype: &'a ParsedContentType,
+ body: &'a [u8],
+}
+
+impl<'a> TextBody<'a> {
+ /// Get the body Content-Type
+ pub fn get_content_type(&self) -> &'a ParsedContentType {
+ self.ctype
+ }
+
+ /// Get the raw body of the message exactly as it is written in the message (or message subpart).
+ pub fn get_raw(&self) -> &'a [u8] {
+ self.body
+ }
+
+ /// Get the body of the message as a Rust string.
+ /// This function converts the body into a Rust UTF-8 string using the charset
+ /// in the Content-Type
+ /// (or "us-ascii" if the charset was missing or not recognized).
+ pub fn get_as_string(&self) -> Result<String, MailParseError> {
+ get_body_as_string(self.body, &self.ctype)
+ }
+}
+
+/// Struct that holds a binary body representation of the message (or message subpart).
+pub struct BinaryBody<'a> {
+ ctype: &'a ParsedContentType,
+ body: &'a [u8],
+}
+
+impl<'a> BinaryBody<'a> {
+ /// Get the body Content-Type
+ pub fn get_content_type(&self) -> &'a ParsedContentType {
+ self.ctype
+ }
+
+ /// Get the raw body of the message exactly as it is written in the message (or message subpart).
+ pub fn get_raw(&self) -> &'a [u8] {
+ self.body
+ }
+}
+
+fn decode_base64(body: &[u8]) -> Result<Vec<u8>, MailParseError> {
+ let cleaned = body
+ .iter()
+ .filter(|c| !c.is_ascii_whitespace())
+ .cloned()
+ .collect::<Vec<u8>>();
+ Ok(base64::decode(&cleaned)?)
+}
+
+fn decode_quoted_printable(body: &[u8]) -> Result<Vec<u8>, MailParseError> {
+ Ok(quoted_printable::decode(
+ body,
+ quoted_printable::ParseMode::Robust,
+ )?)
+}
+
+fn get_body_as_string(body: &[u8], ctype: &ParsedContentType) -> Result<String, MailParseError> {
+ let cow = if let Some(charset) = Charset::for_label(ctype.charset.as_bytes()) {
+ let (cow, _, _) = charset.decode(body);
+ cow
+ } else {
+ decode_ascii(body)
+ };
+ Ok(cow.into_owned())
+}
diff --git a/parser/src/dateparse.rs b/parser/src/dateparse.rs
new file mode 100644
index 0000000..ad735ef
--- /dev/null
+++ b/parser/src/dateparse.rs
@@ -0,0 +1,220 @@
+enum DateParseState {
+ Date,
+ Month,
+ Year,
+ Hour,
+ Minute,
+ Second,
+ Timezone,
+}
+
+fn days_in_month(month: i64, year: i64) -> i64 {
+ match month {
+ 0 | 2 | 4 | 6 | 7 | 9 | 11 => 31,
+ 3 | 5 | 8 | 10 => 30,
+ 1 => {
+ if (year % 400) == 0 {
+ 29
+ } else if (year % 100) == 0 {
+ 28
+ } else if (year % 4) == 0 {
+ 29
+ } else {
+ 28
+ }
+ }
+ _ => 0,
+ }
+}
+
+fn seconds_to_date(year: i64, month: i64, day: i64) -> i64 {
+ let mut result: i64 = 0;
+ for y in 1970..2001 {
+ if y == year {
+ break;
+ }
+ result += 86400 * 365;
+ if (y % 4) == 0 {
+ result += 86400;
+ }
+ }
+ let mut y = 2001;
+ while y < year {
+ if year - y >= 400 {
+ result += (86400 * 365 * 400) + (86400 * 97);
+ y += 400;
+ continue;
+ }
+ if year - y >= 100 {
+ result += (86400 * 365 * 100) + (86400 * 24);
+ y += 100;
+ continue;
+ }
+ if year - y >= 4 {
+ result += (86400 * 365 * 4) + (86400);
+ y += 4;
+ continue;
+ }
+ result += 86400 * 365;
+ y += 1;
+ }
+ for m in 0..month {
+ result += 86400 * days_in_month(m, year)
+ }
+ result + 86400 * (day - 1)
+}
+
+/// Convert a date field from an email header into a UNIX epoch timestamp.
+/// This function handles the most common formatting of date fields found in
+/// email headers. It may fail to parse some of the more creative formattings.
+///
+/// # Examples
+/// ```
+/// use mailparse::dateparse;
+/// assert_eq!(dateparse("Sun, 02 Oct 2016 07:06:22 -0700 (PDT)").unwrap(), 1475417182);
+/// ```
+pub fn dateparse(date: &str) -> Result<i64, &'static str> {
+ let mut result = 0;
+ let mut month = 0;
+ let mut day_of_month = 0;
+ let mut state = DateParseState::Date;
+ for tok in date.split(|c| c == ' ' || c == ':') {
+ if tok.is_empty() {
+ continue;
+ }
+ match state {
+ DateParseState::Date => {
+ if let Ok(v) = tok.parse::<u8>() {
+ day_of_month = v;
+ state = DateParseState::Month;
+ };
+ continue;
+ }
+ DateParseState::Month => {
+ month = match tok.to_uppercase().as_str() {
+ "JAN" | "JANUARY" => 0,
+ "FEB" | "FEBRUARY" => 1,
+