melib: update nom dependency from 3.2.0 to 5.1.1

That was hecking exhausting
author: Manos Pitsidianakis <el13635@mail.ntua.gr> 2020-06-06 19:38:20 +0300
committer: Manos Pitsidianakis <el13635@mail.ntua.gr> 2020-06-06 23:19:07 +0300
commit: 6ec249dd7f5c4dbaafa151f7a3be682e8fe0c2a9 (patch)
tree: dbe8084881a0b6dc11c52642812fc14d12d53bda /melib/src/email/parser.rs
parent: db4c40182880582761937a0b8e0dfdc915628b21 (diff)
1 files changed, 925 insertions, 855 deletions
diff --git a/melib/src/email/parser.rs b/melib/src/email/parser.rs
index b0380cfb..e6520f00 100644
--- a/melib/src/email/parser.rs
+++ b/melib/src/email/parser.rs
@@ -1,7 +1,7 @@
 /*
  * meli - parser module
  *
- * Copyright 2017 Manos Pitsidianakis
+ * Copyright 2017 - 2020 Manos Pitsidianakis
  *
  * This file is part of meli.
  *
@@ -18,14 +18,19 @@
  * You should have received a copy of the GNU General Public License
  * along with meli. If not, see <http://www.gnu.org/licenses/>.
  */
-use super::*;
-use data_encoding::BASE64_MIME;
-use encoding::{DecoderTrap, Encoding};
-use nom::{is_hex_digit, le_u8};
-pub(super) use nom::{ErrorKind, IResult, Needed};
 
-use encoding::all::*;
-use std;
+use crate::error::{MeliError, Result, ResultIntoMeliError};
+use nom::{
+    branch::alt,
+    bytes::complete::{is_a, is_not, tag, take_until, take_while},
+    character::is_hex_digit,
+    combinator::peek,
+    error::ErrorKind,
+    multi::{many0, many1, separated_list, separated_nonempty_list},
+    number::complete::le_u8,
+    sequence::{delimited, preceded, separated_pair, terminated},
+    IResult,
+};
 
 macro_rules! is_ctl_or_space {
     ($var:ident) => {
@@ -121,969 +126,1038 @@ impl<'a, P: for<'r> FnMut(&'r u8) -> bool> BytesIterExt for std::slice::Split<'a
     }
 }
 
-fn quoted_printable_byte(input: &[u8]) -> IResult<&[u8], u8> {
-    if input.len() < 3 {
-        IResult::Incomplete(Needed::Size(1))
-    } else if input[0] == b'=' && is_hex_digit(input[1]) && is_hex_digit(input[2]) {
-        let a = if input[1] < b':' {
-            input[1] - 48
-        } else if input[1] < b'[' {
-            input[1] - 55
-        } else {
-            input[1] - 87
-        };
-        let b = if input[2] < b':' {
-            input[2] - 48
-        } else if input[2] < b'[' {
-            input[2] - 55
-        } else {
-            input[2] - 87
-        };
-        IResult::Done(&input[3..], a * 16 + b)
-    } else if input.starts_with(b"\r\n") {
-        IResult::Done(&input[2..], b'\n')
-    } else {
-        IResult::Error(error_code!(ErrorKind::Custom(43)))
+//fn parser(input: I) -> IResult<I, O, E>;
+pub fn mail(input: &[u8]) -> Result<(Vec<(&[u8], &[u8])>, &[u8])> {
+    let (rest, result) = separated_pair(
+        headers::headers,
+        alt((tag(b"\n"), tag(b"\r\n"))),
+        take_while(|_| true),
+    )(input)
+    .chain_err_summary(|| "Could not parse mail")?;
+
+    if !rest.is_empty() {
+        return Err(MeliError::new("Got leftover bytes after parsing mail"));
     }
-}
 
-// Parser definition
+    Ok(result)
+}
 
-/* A header can span multiple lines, eg:
- *
- * Received: from -------------------- (-------------------------)
- * 	by --------------------- (--------------------- [------------------]) (-----------------------)
- * 	with ESMTP id ------------ for <------------------->;
- * 	Tue,  5 Jan 2016 21:30:44 +0100 (CET)
- */
+pub mod generic {
+    use super::*;
+    pub fn angle_bracket_delimeted_list(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
+        separated_nonempty_list(is_a(","), delimited(tag("<"), take_until(">"), tag(">")))(
+            input.rtrim(),
+        )
+        //    separated_nonempty_list!(complete!(is_a!(",")), ws!(complete!(complete!(delimited!(tag!("<"), take_until1!(">"), tag!(">")))))));
+    }
 
-fn header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
-    let input_len = input.len();
-    for (i, x) in input.iter().enumerate() {
-        if *x == b'\n'
-            && (((i + 1) < input_len && input[i + 1] != b' ' && input[i + 1] != b'\t')
-                || i + 1 == input_len)
-        {
-            return IResult::Done(&input[(i + 1)..], &input[0..i]);
-        } else if input[i..].starts_with(b"\r\n")
-            && (((i + 2) < input_len && input[i + 2] != b' ' && input[i + 2] != b'\t')
-                || i + 2 == input_len)
-        {
-            return IResult::Done(&input[(i + 2)..], &input[0..i]);
+    pub fn date(input: &[u8]) -> Result<crate::datetime::UnixTimestamp> {
+        let (_, mut parsed_result) = encodings::phrase(&eat_comments(input), false)?;
+        if let Some(pos) = parsed_result.find(b"-0000") {
+            parsed_result[pos] = b'+';
         }
+
+        crate::datetime::rfc822_to_timestamp(parsed_result.trim())
     }
-    IResult::Incomplete(Needed::Unknown)
-}
 
-/* Parse a single header as a tuple */
-fn header_with_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
-    if input.is_empty() {
-        return IResult::Incomplete(Needed::Unknown);
-    } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
-        return IResult::Error(error_code!(ErrorKind::Custom(43)));
+    fn eat_comments(input: &[u8]) -> Vec<u8> {
+        let mut in_comment = false;
+        input
+            .iter()
+            .fold(Vec::with_capacity(input.len()), |mut acc, x| {
+                if *x == b'(' && !in_comment {
+                    in_comment = true;
+                    acc
+                } else if *x == b')' && in_comment {
+                    in_comment = false;
+                    acc
+                } else if in_comment {
+                    acc
+                } else {
+                    acc.push(*x);
+                    acc
+                }
+            })
     }
-    let mut ptr = 0;
-    let mut name: &[u8] = &input[0..0];
-    /* field-name  =  1*<any CHAR, excluding CTLs, SPACE, and ":"> */
-    for (i, x) in input.iter().enumerate() {
-        if *x == b':' {
-            name = &input[0..i];
-            ptr = i + 1;
-            break;
-        } else if is_ctl_or_space!(*x) {
-            return IResult::Error(error_code!(ErrorKind::Custom(43)));
+    use crate::email::address::Address;
+    use crate::email::mailto::Mailto;
+    pub fn mailto(mut input: &[u8]) -> IResult<&[u8], Mailto> {
+        if !input.starts_with(b"mailto:") {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
         }
-    }
-    if name.is_empty() {
-        return IResult::Error(error_code!(ErrorKind::Custom(43)));
-    }
-    if ptr >= input.len() {
-        return IResult::Error(error_code!(ErrorKind::Custom(43)));
-    }
 
-    if input[ptr] == b'\n' {
-        ptr += 1;
-        if ptr >= input.len() {
-            return IResult::Error(error_code!(ErrorKind::Custom(43)));
+        input = &input[b"mailto:".len()..];
+
+        let end = input.iter().position(|e| *e == b'?').unwrap_or(input.len());
+        let address: Address;
+
+        if let Ok((_, addr)) = crate::email::parser::address::address(&input[..end]) {
+            address = addr;
+            input = if input[end..].is_empty() {
+                &input[end..]
+            } else {
+                &input[end + 1..]
+            };
+        } else {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
         }
-    } else if input[ptr..].starts_with(b"\r\n") {
-        ptr += 2;
-        if ptr > input.len() {
-            return IResult::Error(error_code!(ErrorKind::Custom(43)));
+
+        let mut subject = None;
+        let mut cc = None;
+        let mut bcc = None;
+        let mut body = None;
+        while !input.is_empty() {
+            let tag = if let Some(tag_pos) = input.iter().position(|e| *e == b'=') {
+                let ret = &input[0..tag_pos];
+                input = &input[tag_pos + 1..];
+                ret
+            } else {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            };
+
+            let value_end = input.iter().position(|e| *e == b'&').unwrap_or(input.len());
+
+            let value = String::from_utf8_lossy(&input[..value_end]).to_string();
+            match tag {
+                b"subject" if subject.is_none() => {
+                    subject = Some(value);
+                }
+                b"cc" if cc.is_none() => {
+                    cc = Some(value);
+                }
+                b"bcc" if bcc.is_none() => {
+                    bcc = Some(value);
+                }
+                b"body" if body.is_none() => {
+                    /* FIXME:
+                     * Parse escaped characters properly.
+                     */
+                    body = Some(value.replace("%20", " ").replace("%0A", "\n"));
+                }
+                _ => {
+                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
+                }
+            }
+            if input[value_end..].is_empty() {
+                break;
+            }
+            input = &input[value_end + 1..];
         }
+        Ok((
+            input,
+            Mailto {
+                address,
+                subject,
+                cc,
+                bcc,
+                body,
+            },
+        ))
     }
-    if ptr >= input.len() {
-        return IResult::Error(error_code!(ErrorKind::Custom(43)));
-    }
-    while input[ptr] == b' ' || input[ptr] == b'\t' {
-        ptr += 1;
-        if ptr >= input.len() {
-            return IResult::Error(error_code!(ErrorKind::Custom(43)));
+
+    pub struct HeaderIterator<'a>(pub &'a [u8]);
+
+    impl<'a> Iterator for HeaderIterator<'a> {
+        type Item = (&'a [u8], &'a [u8]);
+        fn next(&mut self) -> Option<(&'a [u8], &'a [u8])> {
+            if self.0.is_empty() {
+                return None;
+            }
+
+            match super::headers::header(self.0) {
+                Ok((rest, value)) => {
+                    self.0 = rest;
+                    Some(value)
+                }
+                _ => {
+                    self.0 = &[];
+                    None
+                }
+            }
         }
     }
-    match header_value(&input[ptr..]) {
-        IResult::Done(rest, value) => IResult::Done(rest, (name, value)),
-        IResult::Incomplete(needed) => IResult::Incomplete(needed),
-        IResult::Error(code) => IResult::Error(code),
-    }
 }
 
-fn header_without_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
-    if input.is_empty() {
-        return IResult::Incomplete(Needed::Unknown);
-    } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
-        return IResult::Error(error_code!(ErrorKind::Custom(43)));
+pub mod headers {
+    use super::*;
+
+    pub fn headers(input: &[u8]) -> IResult<&[u8], Vec<(&[u8], &[u8])>> {
+        many1(header)(input)
     }
-    let mut ptr = 0;
-    let mut name: &[u8] = &input[0..0];
-    let mut has_colon = false;
-    /* field-name  =  1*<any CHAR, excluding CTLs, SPACE, and ":"> */
-    for (i, x) in input.iter().enumerate() {
-        if input[i..].starts_with(b"\r\n") {
-            name = &input[0..i];
-            ptr = i + 2;
-            break;
-        } else if *x == b':' || *x == b'\n' {
-            name = &input[0..i];
+
+    pub fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+        alt((header_without_val, header_with_val))(input)
+    }
+
+    pub fn header_without_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+        if input.is_empty() {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        }
+        let mut ptr = 0;
+        let mut name: &[u8] = &input[0..0];
+        let mut has_colon = false;
+        /* field-name  =  1*<any CHAR, excluding CTLs, SPACE, and ":"> */
+        for (i, x) in input.iter().enumerate() {
+            if input[i..].starts_with(b"\r\n") {
+                name = &input[0..i];
+                ptr = i + 2;
+                break;
+            } else if *x == b':' || *x == b'\n' {
+                name = &input[0..i];
+                has_colon = true;
+                ptr = i;
+                break;
+            } else if is_ctl_or_space!(*x) {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
+        }
+        if name.is_empty() || input.len() <= ptr {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        }
+        if input[ptr] == b':' {
+            ptr += 1;
             has_colon = true;
-            ptr = i;
-            break;
-        } else if is_ctl_or_space!(*x) {
-            return IResult::Error(error_code!(ErrorKind::Custom(43)));
+            if ptr >= input.len() {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
         }
-    }
-    if name.is_empty() || input.len() <= ptr {
-        return IResult::Error(error_code!(ErrorKind::Custom(43)));
-    }
-    if input[ptr] == b':' {
-        ptr += 1;
-        has_colon = true;
-        if ptr >= input.len() {
-            return IResult::Incomplete(Needed::Unknown);
+
+        if !has_colon {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
         }
-    }
 
-    if !has_colon {
-        return IResult::Incomplete(Needed::Unknown);
+        while input[ptr] == b' ' {
+            ptr += 1;
+            if ptr >= input.len() {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
+        }
+        if input[ptr..].starts_with(b"\n") {
+            ptr += 1;
+            if ptr >= input.len() {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
+            if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
+                Ok((&input[ptr..], (name, b"")))
+            } else {
+                Err(nom::Err::Error((input, ErrorKind::Tag)))
+            }
+        } else if input[ptr..].starts_with(b"\r\n") {
+            ptr += 2;
+            if ptr > input.len() {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
+            if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
+                Ok((&input[ptr..], (name, b"")))
+            } else {
+                Err(nom::Err::Error((input, ErrorKind::Tag)))
+            }
+        } else {
+            Err(nom::Err::Error((input, ErrorKind::Tag)))
+        }
     }
 
-    while input[ptr] == b' ' {
-        ptr += 1;
-        if ptr >= input.len() {
-            return IResult::Incomplete(Needed::Unknown);
+    /* A header can span multiple lines, eg:
+     *
+     * Received: from -------------------- (-------------------------)
+     * 	by --------------------- (--------------------- [------------------]) (-----------------------)
+     * 	with ESMTP id ------------ for <------------------->;
+     * 	Tue,  5 Jan 2016 21:30:44 +0100 (CET)
+     */
+
+    pub fn header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
+        let input_len = input.len();
+        for (i, x) in input.iter().enumerate() {
+            if *x == b'\n'
+                && (((i + 1) < input_len && input[i + 1] != b' ' && input[i + 1] != b'\t')
+                    || i + 1 == input_len)
+            {
+                return Ok((&input[(i + 1)..], &input[0..i]));
+            } else if input[i..].starts_with(b"\r\n")
+                && (((i + 2) < input_len && input[i + 2] != b' ' && input[i + 2] != b'\t')
+                    || i + 2 == input_len)
+            {
+                return Ok((&input[(i + 2)..], &input[0..i]));
+            }
         }
+        Err(nom::Err::Error((input, ErrorKind::Tag)))
     }
-    if input[ptr..].starts_with(b"\n") {
-        ptr += 1;
+
+    /* Parse a single header as a tuple */
+    pub fn header_with_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+        if input.is_empty() {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        }
+        let mut ptr = 0;
+        let mut name: &[u8] = &input[0..0];
+        /* field-name  =  1*<any CHAR, excluding CTLs, SPACE, and ":"> */
+        for (i, x) in input.iter().enumerate() {
+            if *x == b':' {
+                name = &input[0..i];
+                ptr = i + 1;
+                break;
+            } else if is_ctl_or_space!(*x) {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
+        }
+        if name.is_empty() {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        }
         if ptr >= input.len() {
-            return IResult::Incomplete(Needed::Unknown);
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
         }
-        if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
-            IResult::Done(&input[ptr..], (name, b""))
-        } else {
-            IResult::Error(error_code!(ErrorKind::Custom(43)))
+
+        if input[ptr] == b'\n' {
+            ptr += 1;
+            if ptr >= input.len() {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
+        } else if input[ptr..].starts_with(b"\r\n") {
+            ptr += 2;
+            if ptr > input.len() {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
         }
-    } else if input[ptr..].starts_with(b"\r\n") {
-        ptr += 2;
-        if ptr > input.len() {
-            return IResult::Incomplete(Needed::Unknown);
+        if ptr >= input.len() {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
         }
-        if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
-            IResult::Done(&input[ptr..], (name, b""))
-        } else {
-            IResult::Error(error_code!(ErrorKind::Custom(43)))
+        while input[ptr] == b' ' || input[ptr] == b'\t' {
+            ptr += 1;
+            if ptr >= input.len() {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
         }
-    } else {
-        IResult::Error(error_code!(ErrorKind::Custom(43)))
+        header_value(&input[ptr..]).map(|(rest, value)| (rest, (name, value)))
     }
-}
 
-named!(
-    header<(&[u8], &[u8])>,
-    alt_complete!(call!(header_without_val) | call!(header_with_val))
-);
-/* Parse all headers -> Vec<(&str, Vec<&str>)> */
-named!(pub headers<std::vec::Vec<(&[u8], &[u8])>>,
-       many1!(complete!(header)));
-
-pub fn headers_raw(input: &[u8]) -> IResult<&[u8], &[u8]> {
-    if input.is_empty() {
-        return IResult::Incomplete(Needed::Unknown);
-    }
-    for i in 0..input.len() {
-        if input[i..].starts_with(b"\n\n") {
-            return IResult::Done(&input[(i + 1)..], &input[0..=i]);
-        } else if input[i..].starts_with(b"\r\n\r\n") {
-            return IResult::Done(&input[(i + 2)..], &input[0..=i]);
+    pub fn headers_raw(input: &[u8]) -> IResult<&[u8], &[u8]> {
+        if input.is_empty() {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        }
+        for i in 0..input.len() {
+            if input[i..].starts_with(b"\n\n") {
+                return Ok((&input[(i + 1)..], &input[0..=i]));
+            } else if input[i..].starts_with(b"\r\n\r\n") {
+                return Ok((&input[(i + 2)..], &input[0..=i]));
+            }
         }
+        Err(nom::Err::Error((input, ErrorKind::Tag)))
     }
-    IResult::Error(error_code!(ErrorKind::Custom(43)))
 }
 
-named!(pub body_raw<&[u8]>,
-       do_parse!(
-           alt_complete!(take_until1!("\n\n") | take_until1!("\r\n\r\n")) >>
-           body: take_while!(call!(|_| true)) >>
-           ( { body } )));
-
-named!(pub mail<(std::vec::Vec<(&[u8], &[u8])>, &[u8])>,
-       separated_pair!(headers, alt_complete!(tag!(b"\n") | tag!(b"\r\n")), take_while!(call!(|_| true))));
+pub mod attachments {
+    use super::*;
+    use crate::email::address::*;
+    pub fn attachment(input: &[u8]) -> IResult<&[u8], (std::vec::Vec<(&[u8], &[u8])>, &[u8])> {
+        separated_pair(
+            many0(headers::header),
+            alt((tag(b"\n"), tag(b"\r\n"))),
+            take_while(|_| true),
+        )(input)
+    }
 
-named!(pub attachment<(std::vec::Vec<(&[u8], &[u8])>, &[u8])>,
-       do_parse!(
-       pair: separated_pair!(many0!(complete!(header)), alt_complete!(tag!(b"\n") | tag!(b"\r\n")), take_while!(call!(|_| true))) >>
-       ( { pair } )));
+    pub fn multipart_parts<'a>(
+        input: &'a [u8],
+        boundary: &[u8],
+    ) -> IResult<&'a [u8], Vec<StrBuilder>> {
+        let mut ret: Vec<_> = Vec::new();
+        let mut input = input;
+        let mut offset = 0;
+        loop {
+            let b_start = if let Some(v) = input.find(boundary) {
+                v
+            } else {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            };
 
-/* Header parsers */
+            if b_start < 2 {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
+            offset += b_start - 2;
+            input = &input[b_start - 2..];
+            if &input[0..2] == b"--" {
+                offset += 2 + boundary.len();
+                input = &input[2 + boundary.len()..];
+                if input[0] == b'\n' {
+                    offset += 1;
+                    input = &input[1..];
+                } else if input[0..].starts_with(b"\r\n") {
+                    offset += 2;
+                    input = &input[2..];
+                } else {
+                    continue;
+                }
+                break;
+            }
+        }
 
-/* Encoded words
- *"=?charset?encoding?encoded text?=".
- */
-fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
-    if input.is_empty() {
-        return IResult::Done(&[], Vec::with_capacity(0));
-    }
-    if input.len() < 5 {
-        return IResult::Incomplete(Needed::Unknown);
-    } else if input[0] != b'=' || input[1] != b'?' {
-        return IResult::Error(error_code!(ErrorKind::Custom(43)));
-    }
-    /* find end of Charset tag:
-     * =?charset?encoding?encoded text?=
-     * ---------^
-     */
-    let mut tag_end_idx = None;
-    for (idx, b) in input[2..].iter().enumerate() {
-        if *b == b'?' {
-            tag_end_idx = Some(idx + 2);
-            break;
+        loop {
+            if input.len() < boundary.len() + 4 {
+                return Err(nom::Err::Error((input, ErrorKind::Tag)));
+            }
+            if let Some(end) = input.find(boundary) {
+                if &input[end - 2..end] != b"--" {
+                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
+                }
+                ret.push(StrBuilder {
+                    offset,
+                    length: end - 2,
+                });
+                offset += end + boundary.len();
+                input = &input[end + boundary.len()..];
+                if input.len() < 2 || input[0] != b'\n' || &input[0..2] == b"--" {
+                    break;
+                }
+                if input[0] == b'\n' {
+                    offset += 1;
+                    input = &input[1..];
+                } else if input[0..].starts_with(b"\r\n") {
+                    offset += 2;
+                    input = &input[2..];
+                }
+            } else {
+                ret.push(StrBuilder {
+                    offset,
+                    length: input.len(),
+                });
+                break;
+            }
         }
+        Ok((input, ret))
     }
-    if tag_end_idx.is_none() {
-        return IResult::Error(error_code!(ErrorKind::Custom(42)));
-    }
-    let tag_end_idx = tag_end_idx.unwrap();
 
-    if tag_end_idx + 2 >= input.len() || input[2 + tag_end_idx] != b'?' {
-        return IResult::Error(error_code!(ErrorKind::Custom(43)));
-    }
-    /* See if input ends with "?=" and get ending index
-     * =?charset?encoding?encoded text?=
-     * -------------------------------^
-     */
-    let mut encoded_end_idx = None;
-    for i in (3 + tag_end_idx)..input.len() {
-        if input[i] == b'?' && i + 1 < input.len() && input[i + 1] == b'=' {
-            encoded_end_idx = Some(i);
-            break;
+    fn parts_f(boundary: &[u8]) -> impl Fn(&[u8]) -> IResult<&[u8], Vec<&[u8]>> + '_ {
+        move |input: &[u8]| -> IResult<&[u8], Vec<&[u8]>> {
+            let mut ret: Vec<&[u8]> = Vec::new();
+            let mut input = input;
+            loop {
+                let b_start = if let Some(v) = input.find(boundary) {
+                    v
+                } else {
+                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
+                };
+
+                if b_start < 2 {
+                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
+                }
+                input = &input[b_start - 2..];
+                if &input[0..2] == b"--" {
+                    input = &input[2 + boundary.len()..];
+                    if input[0] == b'\n' {
+                        input = &input[1..];
+                    } else if input[0..].starts_with(b"\r\n") {
+                        input = &input[2..];
+                    } else {
+                        continue;
+                    }
+                    break;
+                }
+            }
+            loop {
+                if input.len() < boundary.len() + 4 {
+                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
+                }
+                if let Some(end) = input.find(boundary) {
+                    if &input[end - 2..end] != b"--" {
+                        return Err(nom::Err::Error((input, ErrorKind::Tag)));
+                    }
+                    ret.push(&input[0..end - 2]);
+                    input = &input[end + boundary.len()..];
+                    if input.len() < 2
+                        || (input[0] != b'\n' && &input[0..2] != b"\r\n")
+                        || &input[0..2] == b"--"
+                    {
+                        break;
+                    }
+                    if input[0] == b'\n' {
+                        input = &input[1..];
+                    } else if input[0..].starts_with(b"\r\n") {
+                        input = &input[2..];
+                    }
+                } else {
+                    ret.push(input);
+                    break;
+                }
+            }
+            Ok((input, ret))
         }
     }
-    if encoded_end_idx.is_none() {
-        return IResult::Error(error_code!(ErrorKind::Custom(44)));
-    }
-    let encoded_end_idx = encoded_end_idx.unwrap();
-    let encoded_text = &input[3 + tag_end_idx..encoded_end_idx];
 
-    let s: Vec<u8> = match input[tag_end_idx + 1] {
-        b'b' | b'B' => match BASE64_MIME.decode(encoded_text) {
-            Ok(v) => v,
-            Err(_) => encoded_text.to_vec(),
-        },
-        b'q' | b'Q' => match quoted_printable_bytes_header(encoded_text) {
-            IResult::Done(b"", s) => s,
-            _ => return IResult::Error(error_code!(ErrorKind::Custom(45))),
-        },
-        _ => return IResult::Error(error_code!(ErrorKind::Custom(46))),
-    };
+    pub fn parts<'a>(input: &'a [u8], boundary: &[u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
+        alt((
+            parts_f(boundary),
+            |input: &'a [u8]| -> IResult<&'a [u8], Vec<&'a [u8]>> {
+                let (input, _) = take_until(&b"--"[..])(input)?;
+                let (input, _) = take_until(boundary)(input)?;
+                Ok((input, Vec::<&[u8]>::new()))
+            },
+        ))(input)
+        /*
+            alt_complete!(call!(parts_f, boundary) | do_parse!(
+                        take_until_and_consume!(&b"--"[..]) >>
+                        take_until_and_consume!(boundary) >>
+                        ( { Vec::<&[u8]>::new() } ))
+                    ));
+        */
+    }
 
-    let charset = Charset::from(&input[2..tag_end_idx]);
+    /* Caution: values should be passed through phrase() */
+    pub fn content_type_parameter(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+        let (input, _) = tag(";")(input)?;
+        let (input, name) = terminated(take_until("="), tag("="))(input.ltrim())?;
+        let (input, value) = alt((
+            delimited(tag("\""), take_until("\""), tag("\"")),
+            is_not(";"),
+        ))(input.ltrim())?;
 
-    if let Charset::UTF8 = charset {
-        IResult::Done(&input[encoded_end_idx + 2..], s)
-    } else {
-        match decode_charset(&s, charset) {
-            Ok(v) => IResult::Done(&input[encoded_end_idx + 2..], v.into_bytes()),
-            _ => IResult::Error(error_code!(ErrorKind::Custom(43))),
-        }
+        Ok((input, (name, value)))
     }
-}
 
-pub fn decode_charset(s: &[u8], charset: Charset) -> Result<String> {
-    match charset {
-        Charset::UTF8 | Charset::Ascii => Ok(String::from_utf8_lossy(s).to_string()),
-        Charset::ISO8859_1 => Ok(ISO_8859_1.decode(s, DecoderTrap::Strict)?),
-        Charset::ISO8859_2 => Ok(ISO_8859_2.decode(s, DecoderTrap::Strict)?),
-        Charset::ISO8859_7 => Ok(ISO_8859_7.decode(s, DecoderTrap::Strict)?),
-        Charset::ISO8859_15 => Ok(ISO_8859_15.decode(s, DecoderTrap::Strict)?),
-        Charset::GBK => Ok(GBK.decode(s, DecoderTrap::Strict)?),
-        Charset::Windows1250 => Ok(WINDOWS_1250.decode(s, DecoderTrap::Strict)?),
-        Charset::Windows1251 => Ok(WINDOWS_1251.decode(s, DecoderTrap::Strict)?),
-        Charset::Windows1252 => Ok(WINDOWS_1252.decode(s, DecoderTrap::Strict)?),
-        Charset::Windows1253 => Ok(WINDOWS_1253.decode(s, DecoderTrap::Strict)?),
-        // Unimplemented:
-        Charset::GB2312 => Ok(String::from_utf8_lossy(s).to_string()),
-        Charset::UTF16 => Ok(String::from_utf8_lossy(s).to_string()),
-        Charset::BIG5 => Ok(String::from_utf8_lossy(s).to_string()),
-        Charset::ISO2022JP => Ok(String::from_utf8_lossy(s).to_string()),
+    pub fn content_type(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8], Vec<(&[u8], &[u8])>)> {
+        let (input, _type) = take_until("/")(input)?;
+        let (input, _) = tag("/")(input)?;
+        let (input, _subtype) = is_not(";")(input)?;
+        let (input, parameters) = many0(content_type_parameter)(input)?;
+        Ok((input, (_type, _subtype, parameters)))
+        /*
+           do_parse!(
+               _type: take_until!("/") >>
+               tag!("/") >>
+               _subtype: is_not!(";") >>
+               parameters: many0!(complete!(content_type_parameter)) >>
+               ( {
+                   (_type, _subtype, parameters)
+               } )
+               ));
+        */
     }
 }
 
-fn quoted_printable_soft_break(input: &[u8]) -> IResult<&[u8], &[u8]> {
-    if input.len() < 2 {
-        IResult::Incomplete(Needed::Size(1))
-    } else if input[0] == b'=' && input[1] == b'\n' {
-        IResult::Done(&input[2..], &input[0..2]) // `=\n` is an escaped space character.
-    } else if input.len() > 3 && input.starts_with(b"=\r\n") {
-        IResult::Done(&input[3..], &input[0..3]) // `=\r\n` is an escaped space character.
-    } else {
-        IResult::Error(error_code!(ErrorKind::Custom(43)))
+pub mod encodings {
+    use super::*;
+    use crate::email::attachment_types::Charset;
+    use data_encoding::BASE64_MIME;
+    use encoding::all::*;
+    use encoding::{DecoderTrap, Encoding};
+    pub fn quoted_printable_byte(input: &[u8]) -> IResult<&[u8], u8> {
+        if input.len() < 3 {
+            Err(nom::Err::Error((input, ErrorKind::Tag)))
+        } else if input[0] == b'=' && is_hex_digit(input[1]) && is_hex_digit(input[2]) {
+            let a = if input[1] < b':' {
+                input[1] - 48
+            } else if input[1] < b'[' {
+                input[1] - 55
+            } else {
+                input[1] - 87
+            };
+            let b = if input[2] < b':' {
+                input[2] - 48
+            } else if input[2] < b'[' {
+                input[2] - 55
+            } else {
+                input[2] - 87
+            };
+            Ok((&input[3..], a * 16 + b))
+        } else if input.starts_with(b"\r\n") {
+            Ok((&input[2..], b'\n'))
+        } else {
+            Err(nom::Err::Error((input, ErrorKind::Tag)))
+        }
     }
-}
 
-named!(
-    qp_underscore_header<u8>,
-    do_parse!(tag!(b"_") >> ({ 0x20 }))
-);
-
-// With MIME, headers in quoted printable format can contain underscores that represent spaces.
-// In non-header context, an underscore is just a plain underscore.
-named!(
-    pub quoted_printable_bytes_header<Vec<u8>>,
-    many0!(alt_complete!(
-        quoted_printable_byte | qp_underscore_header | le_u8
-    ))
-);
-
-// For atoms in Header values.
-named!(
-    pub quoted_printable_bytes<Vec<u8>>,
-    many0!(alt_complete!(
-        preceded!(quoted_printable_soft_break, quoted_printable_byte) |
-        preceded!(quoted_printable_soft_break, le_u8) | quoted_printable_byte | le_u8
-    ))
-);
-
-fn display_addr(input: &[u8]) -> IResult<&[u8], Address> {
-    if input.is_empty() || input.len() < 3 {
-        IResult::Incomplete(Needed::Size(1))
-    } else if !is_whitespace!(input[0]) {
-        let mut display_name = StrBuilder {
-            offset: 0,
-            length: 0,
-        };
-        let mut flag = false;
-        for (i, b) in input[0..].iter().enumerate() {
-            if *b == b'<' {
-                display_name.length = i.saturating_sub(1); // if i != 0 { i - 1 } else { 0 };
-                flag = true;
+    /* Encoded words
+     *"=?charset?encoding?encoded text?=".
+     */
+    fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
+        if input.is_empty() {
+            return Ok((&[], Vec::with_capacity(0)));
+        }
+        if input.len() < 5 {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        } else if input[0] != b'=' || input[1] != b'?' {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        }
+        /* find end of Charset tag:
+         * =?charset?encoding?encoded text?=
+         * ---------^
+         */
+        let mut tag_end_idx = None;
+        for (idx, b) in input[2..].iter().enumerate() {
+            if *b == b'?' {
+                tag_end_idx = Some(idx + 2);
                 break;
             }
         }
-        if !flag {
-            let (rest, output) = match phrase(input, false) {
-                IResult::Done(rest, raw) => (rest, raw),
-                _ => return IResult::Error(error_code!(ErrorKind::Custom(43))),
-            };
-            if output.contains(&b'<') {
-                match display_addr(&output) {
-                    IResult::Done(_, address) => return IResult::Done(rest, address),
-                    _ => return IResult::Error(error_code!(ErrorKind::Custom(43))),
-                }
-            }
-            return IResult::Error(error_code!(ErrorKind::Custom(43)));
+        if tag_end_idx.is_none() {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
         }
-        let mut end = input.len();
-        let mut at_flag = false;
-        let mut flag = false;
-        for (i, b) in input[display_name.length + 2..].iter().enumerate() {
-            match *b {
-                b'@' => at_flag = true,
-                b'>' => {
-                    end = i;
-                    flag = true;
-                    break;
-                }
-                _ => {}
+        let tag_end_idx = tag_end_idx.unwrap();
+
+        if tag_end_idx + 2 >= input.len() || input[2 + tag_end_idx] != b'?' {
+            return Err(nom::Err::Error((input, ErrorKind::Tag)));
+        }
+        /* See if input ends with "?=" and get ending index
+         * =?charset?encoding?encoded text?=
+         * -------------------------------^
+         */
+        let mut encoded_end_idx = None;
+        for i in (3 + tag
author	Manos Pitsidianakis <el13635@mail.ntua.gr>	2020-06-06 19:38:20 +0300
committer	Manos Pitsidianakis <el13635@mail.ntua.gr>	2020-06-06 23:19:07 +0300
commit	6ec249dd7f5c4dbaafa151f7a3be682e8fe0c2a9 (patch)
tree	dbe8084881a0b6dc11c52642812fc14d12d53bda /melib/src/email/parser.rs
parent	db4c40182880582761937a0b8e0dfdc915628b21 (diff)