summaryrefslogtreecommitdiffstats
path: root/melib/src/email/parser.rs
diff options
context:
space:
mode:
authorManos Pitsidianakis <el13635@mail.ntua.gr>2020-06-06 19:38:20 +0300
committerManos Pitsidianakis <el13635@mail.ntua.gr>2020-06-06 23:19:07 +0300
commit6ec249dd7f5c4dbaafa151f7a3be682e8fe0c2a9 (patch)
treedbe8084881a0b6dc11c52642812fc14d12d53bda /melib/src/email/parser.rs
parentdb4c40182880582761937a0b8e0dfdc915628b21 (diff)
melib: update nom dependency from 3.2.0 to 5.1.1
That was hecking exhausting
Diffstat (limited to 'melib/src/email/parser.rs')
-rw-r--r--melib/src/email/parser.rs1780
1 files changed, 925 insertions, 855 deletions
diff --git a/melib/src/email/parser.rs b/melib/src/email/parser.rs
index b0380cfb..e6520f00 100644
--- a/melib/src/email/parser.rs
+++ b/melib/src/email/parser.rs
@@ -1,7 +1,7 @@
/*
* meli - parser module
*
- * Copyright 2017 Manos Pitsidianakis
+ * Copyright 2017 - 2020 Manos Pitsidianakis
*
* This file is part of meli.
*
@@ -18,14 +18,19 @@
* You should have received a copy of the GNU General Public License
* along with meli. If not, see <http://www.gnu.org/licenses/>.
*/
-use super::*;
-use data_encoding::BASE64_MIME;
-use encoding::{DecoderTrap, Encoding};
-use nom::{is_hex_digit, le_u8};
-pub(super) use nom::{ErrorKind, IResult, Needed};
-use encoding::all::*;
-use std;
+use crate::error::{MeliError, Result, ResultIntoMeliError};
+use nom::{
+ branch::alt,
+ bytes::complete::{is_a, is_not, tag, take_until, take_while},
+ character::is_hex_digit,
+ combinator::peek,
+ error::ErrorKind,
+ multi::{many0, many1, separated_list, separated_nonempty_list},
+ number::complete::le_u8,
+ sequence::{delimited, preceded, separated_pair, terminated},
+ IResult,
+};
macro_rules! is_ctl_or_space {
($var:ident) => {
@@ -121,969 +126,1038 @@ impl<'a, P: for<'r> FnMut(&'r u8) -> bool> BytesIterExt for std::slice::Split<'a
}
}
-fn quoted_printable_byte(input: &[u8]) -> IResult<&[u8], u8> {
- if input.len() < 3 {
- IResult::Incomplete(Needed::Size(1))
- } else if input[0] == b'=' && is_hex_digit(input[1]) && is_hex_digit(input[2]) {
- let a = if input[1] < b':' {
- input[1] - 48
- } else if input[1] < b'[' {
- input[1] - 55
- } else {
- input[1] - 87
- };
- let b = if input[2] < b':' {
- input[2] - 48
- } else if input[2] < b'[' {
- input[2] - 55
- } else {
- input[2] - 87
- };
- IResult::Done(&input[3..], a * 16 + b)
- } else if input.starts_with(b"\r\n") {
- IResult::Done(&input[2..], b'\n')
- } else {
- IResult::Error(error_code!(ErrorKind::Custom(43)))
+//fn parser(input: I) -> IResult<I, O, E>;
+pub fn mail(input: &[u8]) -> Result<(Vec<(&[u8], &[u8])>, &[u8])> {
+ let (rest, result) = separated_pair(
+ headers::headers,
+ alt((tag(b"\n"), tag(b"\r\n"))),
+ take_while(|_| true),
+ )(input)
+ .chain_err_summary(|| "Could not parse mail")?;
+
+ if !rest.is_empty() {
+ return Err(MeliError::new("Got leftover bytes after parsing mail"));
}
-}
-// Parser definition
+ Ok(result)
+}
-/* A header can span multiple lines, eg:
- *
- * Received: from -------------------- (-------------------------)
- * by --------------------- (--------------------- [------------------]) (-----------------------)
- * with ESMTP id ------------ for <------------------->;
- * Tue, 5 Jan 2016 21:30:44 +0100 (CET)
- */
+pub mod generic {
+ use super::*;
+ pub fn angle_bracket_delimeted_list(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
+ separated_nonempty_list(is_a(","), delimited(tag("<"), take_until(">"), tag(">")))(
+ input.rtrim(),
+ )
+ // separated_nonempty_list!(complete!(is_a!(",")), ws!(complete!(complete!(delimited!(tag!("<"), take_until1!(">"), tag!(">")))))));
+ }
-fn header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
- let input_len = input.len();
- for (i, x) in input.iter().enumerate() {
- if *x == b'\n'
- && (((i + 1) < input_len && input[i + 1] != b' ' && input[i + 1] != b'\t')
- || i + 1 == input_len)
- {
- return IResult::Done(&input[(i + 1)..], &input[0..i]);
- } else if input[i..].starts_with(b"\r\n")
- && (((i + 2) < input_len && input[i + 2] != b' ' && input[i + 2] != b'\t')
- || i + 2 == input_len)
- {
- return IResult::Done(&input[(i + 2)..], &input[0..i]);
+ pub fn date(input: &[u8]) -> Result<crate::datetime::UnixTimestamp> {
+ let (_, mut parsed_result) = encodings::phrase(&eat_comments(input), false)?;
+ if let Some(pos) = parsed_result.find(b"-0000") {
+ parsed_result[pos] = b'+';
}
+
+ crate::datetime::rfc822_to_timestamp(parsed_result.trim())
}
- IResult::Incomplete(Needed::Unknown)
-}
-/* Parse a single header as a tuple */
-fn header_with_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
- if input.is_empty() {
- return IResult::Incomplete(Needed::Unknown);
- } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
+ fn eat_comments(input: &[u8]) -> Vec<u8> {
+ let mut in_comment = false;
+ input
+ .iter()
+ .fold(Vec::with_capacity(input.len()), |mut acc, x| {
+ if *x == b'(' && !in_comment {
+ in_comment = true;
+ acc
+ } else if *x == b')' && in_comment {
+ in_comment = false;
+ acc
+ } else if in_comment {
+ acc
+ } else {
+ acc.push(*x);
+ acc
+ }
+ })
}
- let mut ptr = 0;
- let mut name: &[u8] = &input[0..0];
- /* field-name = 1*<any CHAR, excluding CTLs, SPACE, and ":"> */
- for (i, x) in input.iter().enumerate() {
- if *x == b':' {
- name = &input[0..i];
- ptr = i + 1;
- break;
- } else if is_ctl_or_space!(*x) {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
+ use crate::email::address::Address;
+ use crate::email::mailto::Mailto;
+ pub fn mailto(mut input: &[u8]) -> IResult<&[u8], Mailto> {
+ if !input.starts_with(b"mailto:") {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
}
- }
- if name.is_empty() {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
- }
- if ptr >= input.len() {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
- }
- if input[ptr] == b'\n' {
- ptr += 1;
- if ptr >= input.len() {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
+ input = &input[b"mailto:".len()..];
+
+ let end = input.iter().position(|e| *e == b'?').unwrap_or(input.len());
+ let address: Address;
+
+ if let Ok((_, addr)) = crate::email::parser::address::address(&input[..end]) {
+ address = addr;
+ input = if input[end..].is_empty() {
+ &input[end..]
+ } else {
+ &input[end + 1..]
+ };
+ } else {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
}
- } else if input[ptr..].starts_with(b"\r\n") {
- ptr += 2;
- if ptr > input.len() {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
+
+ let mut subject = None;
+ let mut cc = None;
+ let mut bcc = None;
+ let mut body = None;
+ while !input.is_empty() {
+ let tag = if let Some(tag_pos) = input.iter().position(|e| *e == b'=') {
+ let ret = &input[0..tag_pos];
+ input = &input[tag_pos + 1..];
+ ret
+ } else {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ };
+
+ let value_end = input.iter().position(|e| *e == b'&').unwrap_or(input.len());
+
+ let value = String::from_utf8_lossy(&input[..value_end]).to_string();
+ match tag {
+ b"subject" if subject.is_none() => {
+ subject = Some(value);
+ }
+ b"cc" if cc.is_none() => {
+ cc = Some(value);
+ }
+ b"bcc" if bcc.is_none() => {
+ bcc = Some(value);
+ }
+ b"body" if body.is_none() => {
+ /* FIXME:
+ * Parse escaped characters properly.
+ */
+ body = Some(value.replace("%20", " ").replace("%0A", "\n"));
+ }
+ _ => {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ }
+ if input[value_end..].is_empty() {
+ break;
+ }
+ input = &input[value_end + 1..];
}
+ Ok((
+ input,
+ Mailto {
+ address,
+ subject,
+ cc,
+ bcc,
+ body,
+ },
+ ))
}
- if ptr >= input.len() {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
- }
- while input[ptr] == b' ' || input[ptr] == b'\t' {
- ptr += 1;
- if ptr >= input.len() {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
+
+ pub struct HeaderIterator<'a>(pub &'a [u8]);
+
+ impl<'a> Iterator for HeaderIterator<'a> {
+ type Item = (&'a [u8], &'a [u8]);
+ fn next(&mut self) -> Option<(&'a [u8], &'a [u8])> {
+ if self.0.is_empty() {
+ return None;
+ }
+
+ match super::headers::header(self.0) {
+ Ok((rest, value)) => {
+ self.0 = rest;
+ Some(value)
+ }
+ _ => {
+ self.0 = &[];
+ None
+ }
+ }
}
}
- match header_value(&input[ptr..]) {
- IResult::Done(rest, value) => IResult::Done(rest, (name, value)),
- IResult::Incomplete(needed) => IResult::Incomplete(needed),
- IResult::Error(code) => IResult::Error(code),
- }
}
-fn header_without_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
- if input.is_empty() {
- return IResult::Incomplete(Needed::Unknown);
- } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
+pub mod headers {
+ use super::*;
+
+ pub fn headers(input: &[u8]) -> IResult<&[u8], Vec<(&[u8], &[u8])>> {
+ many1(header)(input)
}
- let mut ptr = 0;
- let mut name: &[u8] = &input[0..0];
- let mut has_colon = false;
- /* field-name = 1*<any CHAR, excluding CTLs, SPACE, and ":"> */
- for (i, x) in input.iter().enumerate() {
- if input[i..].starts_with(b"\r\n") {
- name = &input[0..i];
- ptr = i + 2;
- break;
- } else if *x == b':' || *x == b'\n' {
- name = &input[0..i];
+
+ pub fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+ alt((header_without_val, header_with_val))(input)
+ }
+
+ pub fn header_without_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+ if input.is_empty() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ let mut ptr = 0;
+ let mut name: &[u8] = &input[0..0];
+ let mut has_colon = false;
+ /* field-name = 1*<any CHAR, excluding CTLs, SPACE, and ":"> */
+ for (i, x) in input.iter().enumerate() {
+ if input[i..].starts_with(b"\r\n") {
+ name = &input[0..i];
+ ptr = i + 2;
+ break;
+ } else if *x == b':' || *x == b'\n' {
+ name = &input[0..i];
+ has_colon = true;
+ ptr = i;
+ break;
+ } else if is_ctl_or_space!(*x) {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ }
+ if name.is_empty() || input.len() <= ptr {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ if input[ptr] == b':' {
+ ptr += 1;
has_colon = true;
- ptr = i;
- break;
- } else if is_ctl_or_space!(*x) {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
+ if ptr >= input.len() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
}
- }
- if name.is_empty() || input.len() <= ptr {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
- }
- if input[ptr] == b':' {
- ptr += 1;
- has_colon = true;
- if ptr >= input.len() {
- return IResult::Incomplete(Needed::Unknown);
+
+ if !has_colon {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
}
- }
- if !has_colon {
- return IResult::Incomplete(Needed::Unknown);
+ while input[ptr] == b' ' {
+ ptr += 1;
+ if ptr >= input.len() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ }
+ if input[ptr..].starts_with(b"\n") {
+ ptr += 1;
+ if ptr >= input.len() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
+ Ok((&input[ptr..], (name, b"")))
+ } else {
+ Err(nom::Err::Error((input, ErrorKind::Tag)))
+ }
+ } else if input[ptr..].starts_with(b"\r\n") {
+ ptr += 2;
+ if ptr > input.len() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
+ Ok((&input[ptr..], (name, b"")))
+ } else {
+ Err(nom::Err::Error((input, ErrorKind::Tag)))
+ }
+ } else {
+ Err(nom::Err::Error((input, ErrorKind::Tag)))
+ }
}
- while input[ptr] == b' ' {
- ptr += 1;
- if ptr >= input.len() {
- return IResult::Incomplete(Needed::Unknown);
+ /* A header can span multiple lines, eg:
+ *
+ * Received: from -------------------- (-------------------------)
+ * by --------------------- (--------------------- [------------------]) (-----------------------)
+ * with ESMTP id ------------ for <------------------->;
+ * Tue, 5 Jan 2016 21:30:44 +0100 (CET)
+ */
+
+ pub fn header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
+ let input_len = input.len();
+ for (i, x) in input.iter().enumerate() {
+ if *x == b'\n'
+ && (((i + 1) < input_len && input[i + 1] != b' ' && input[i + 1] != b'\t')
+ || i + 1 == input_len)
+ {
+ return Ok((&input[(i + 1)..], &input[0..i]));
+ } else if input[i..].starts_with(b"\r\n")
+ && (((i + 2) < input_len && input[i + 2] != b' ' && input[i + 2] != b'\t')
+ || i + 2 == input_len)
+ {
+ return Ok((&input[(i + 2)..], &input[0..i]));
+ }
}
+ Err(nom::Err::Error((input, ErrorKind::Tag)))
}
- if input[ptr..].starts_with(b"\n") {
- ptr += 1;
+
+ /* Parse a single header as a tuple */
+ pub fn header_with_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+ if input.is_empty() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ let mut ptr = 0;
+ let mut name: &[u8] = &input[0..0];
+ /* field-name = 1*<any CHAR, excluding CTLs, SPACE, and ":"> */
+ for (i, x) in input.iter().enumerate() {
+ if *x == b':' {
+ name = &input[0..i];
+ ptr = i + 1;
+ break;
+ } else if is_ctl_or_space!(*x) {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ }
+ if name.is_empty() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
if ptr >= input.len() {
- return IResult::Incomplete(Needed::Unknown);
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
}
- if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
- IResult::Done(&input[ptr..], (name, b""))
- } else {
- IResult::Error(error_code!(ErrorKind::Custom(43)))
+
+ if input[ptr] == b'\n' {
+ ptr += 1;
+ if ptr >= input.len() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ } else if input[ptr..].starts_with(b"\r\n") {
+ ptr += 2;
+ if ptr > input.len() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
}
- } else if input[ptr..].starts_with(b"\r\n") {
- ptr += 2;
- if ptr > input.len() {
- return IResult::Incomplete(Needed::Unknown);
+ if ptr >= input.len() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
}
- if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
- IResult::Done(&input[ptr..], (name, b""))
- } else {
- IResult::Error(error_code!(ErrorKind::Custom(43)))
+ while input[ptr] == b' ' || input[ptr] == b'\t' {
+ ptr += 1;
+ if ptr >= input.len() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
}
- } else {
- IResult::Error(error_code!(ErrorKind::Custom(43)))
+ header_value(&input[ptr..]).map(|(rest, value)| (rest, (name, value)))
}
-}
-named!(
- header<(&[u8], &[u8])>,
- alt_complete!(call!(header_without_val) | call!(header_with_val))
-);
-/* Parse all headers -> Vec<(&str, Vec<&str>)> */
-named!(pub headers<std::vec::Vec<(&[u8], &[u8])>>,
- many1!(complete!(header)));
-
-pub fn headers_raw(input: &[u8]) -> IResult<&[u8], &[u8]> {
- if input.is_empty() {
- return IResult::Incomplete(Needed::Unknown);
- }
- for i in 0..input.len() {
- if input[i..].starts_with(b"\n\n") {
- return IResult::Done(&input[(i + 1)..], &input[0..=i]);
- } else if input[i..].starts_with(b"\r\n\r\n") {
- return IResult::Done(&input[(i + 2)..], &input[0..=i]);
+ pub fn headers_raw(input: &[u8]) -> IResult<&[u8], &[u8]> {
+ if input.is_empty() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ for i in 0..input.len() {
+ if input[i..].starts_with(b"\n\n") {
+ return Ok((&input[(i + 1)..], &input[0..=i]));
+ } else if input[i..].starts_with(b"\r\n\r\n") {
+ return Ok((&input[(i + 2)..], &input[0..=i]));
+ }
}
+ Err(nom::Err::Error((input, ErrorKind::Tag)))
}
- IResult::Error(error_code!(ErrorKind::Custom(43)))
}
-named!(pub body_raw<&[u8]>,
- do_parse!(
- alt_complete!(take_until1!("\n\n") | take_until1!("\r\n\r\n")) >>
- body: take_while!(call!(|_| true)) >>
- ( { body } )));
-
-named!(pub mail<(std::vec::Vec<(&[u8], &[u8])>, &[u8])>,
- separated_pair!(headers, alt_complete!(tag!(b"\n") | tag!(b"\r\n")), take_while!(call!(|_| true))));
+pub mod attachments {
+ use super::*;
+ use crate::email::address::*;
+ pub fn attachment(input: &[u8]) -> IResult<&[u8], (std::vec::Vec<(&[u8], &[u8])>, &[u8])> {
+ separated_pair(
+ many0(headers::header),
+ alt((tag(b"\n"), tag(b"\r\n"))),
+ take_while(|_| true),
+ )(input)
+ }
-named!(pub attachment<(std::vec::Vec<(&[u8], &[u8])>, &[u8])>,
- do_parse!(
- pair: separated_pair!(many0!(complete!(header)), alt_complete!(tag!(b"\n") | tag!(b"\r\n")), take_while!(call!(|_| true))) >>
- ( { pair } )));
+ pub fn multipart_parts<'a>(
+ input: &'a [u8],
+ boundary: &[u8],
+ ) -> IResult<&'a [u8], Vec<StrBuilder>> {
+ let mut ret: Vec<_> = Vec::new();
+ let mut input = input;
+ let mut offset = 0;
+ loop {
+ let b_start = if let Some(v) = input.find(boundary) {
+ v
+ } else {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ };
-/* Header parsers */
+ if b_start < 2 {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ offset += b_start - 2;
+ input = &input[b_start - 2..];
+ if &input[0..2] == b"--" {
+ offset += 2 + boundary.len();
+ input = &input[2 + boundary.len()..];
+ if input[0] == b'\n' {
+ offset += 1;
+ input = &input[1..];
+ } else if input[0..].starts_with(b"\r\n") {
+ offset += 2;
+ input = &input[2..];
+ } else {
+ continue;
+ }
+ break;
+ }
+ }
-/* Encoded words
- *"=?charset?encoding?encoded text?=".
- */
-fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
- if input.is_empty() {
- return IResult::Done(&[], Vec::with_capacity(0));
- }
- if input.len() < 5 {
- return IResult::Incomplete(Needed::Unknown);
- } else if input[0] != b'=' || input[1] != b'?' {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
- }
- /* find end of Charset tag:
- * =?charset?encoding?encoded text?=
- * ---------^
- */
- let mut tag_end_idx = None;
- for (idx, b) in input[2..].iter().enumerate() {
- if *b == b'?' {
- tag_end_idx = Some(idx + 2);
- break;
+ loop {
+ if input.len() < boundary.len() + 4 {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ if let Some(end) = input.find(boundary) {
+ if &input[end - 2..end] != b"--" {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ ret.push(StrBuilder {
+ offset,
+ length: end - 2,
+ });
+ offset += end + boundary.len();
+ input = &input[end + boundary.len()..];
+ if input.len() < 2 || input[0] != b'\n' || &input[0..2] == b"--" {
+ break;
+ }
+ if input[0] == b'\n' {
+ offset += 1;
+ input = &input[1..];
+ } else if input[0..].starts_with(b"\r\n") {
+ offset += 2;
+ input = &input[2..];
+ }
+ } else {
+ ret.push(StrBuilder {
+ offset,
+ length: input.len(),
+ });
+ break;
+ }
}
+ Ok((input, ret))
}
- if tag_end_idx.is_none() {
- return IResult::Error(error_code!(ErrorKind::Custom(42)));
- }
- let tag_end_idx = tag_end_idx.unwrap();
- if tag_end_idx + 2 >= input.len() || input[2 + tag_end_idx] != b'?' {
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
- }
- /* See if input ends with "?=" and get ending index
- * =?charset?encoding?encoded text?=
- * -------------------------------^
- */
- let mut encoded_end_idx = None;
- for i in (3 + tag_end_idx)..input.len() {
- if input[i] == b'?' && i + 1 < input.len() && input[i + 1] == b'=' {
- encoded_end_idx = Some(i);
- break;
+ fn parts_f(boundary: &[u8]) -> impl Fn(&[u8]) -> IResult<&[u8], Vec<&[u8]>> + '_ {
+ move |input: &[u8]| -> IResult<&[u8], Vec<&[u8]>> {
+ let mut ret: Vec<&[u8]> = Vec::new();
+ let mut input = input;
+ loop {
+ let b_start = if let Some(v) = input.find(boundary) {
+ v
+ } else {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ };
+
+ if b_start < 2 {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ input = &input[b_start - 2..];
+ if &input[0..2] == b"--" {
+ input = &input[2 + boundary.len()..];
+ if input[0] == b'\n' {
+ input = &input[1..];
+ } else if input[0..].starts_with(b"\r\n") {
+ input = &input[2..];
+ } else {
+ continue;
+ }
+ break;
+ }
+ }
+ loop {
+ if input.len() < boundary.len() + 4 {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ if let Some(end) = input.find(boundary) {
+ if &input[end - 2..end] != b"--" {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ ret.push(&input[0..end - 2]);
+ input = &input[end + boundary.len()..];
+ if input.len() < 2
+ || (input[0] != b'\n' && &input[0..2] != b"\r\n")
+ || &input[0..2] == b"--"
+ {
+ break;
+ }
+ if input[0] == b'\n' {
+ input = &input[1..];
+ } else if input[0..].starts_with(b"\r\n") {
+ input = &input[2..];
+ }
+ } else {
+ ret.push(input);
+ break;
+ }
+ }
+ Ok((input, ret))
}
}
- if encoded_end_idx.is_none() {
- return IResult::Error(error_code!(ErrorKind::Custom(44)));
- }
- let encoded_end_idx = encoded_end_idx.unwrap();
- let encoded_text = &input[3 + tag_end_idx..encoded_end_idx];
- let s: Vec<u8> = match input[tag_end_idx + 1] {
- b'b' | b'B' => match BASE64_MIME.decode(encoded_text) {
- Ok(v) => v,
- Err(_) => encoded_text.to_vec(),
- },
- b'q' | b'Q' => match quoted_printable_bytes_header(encoded_text) {
- IResult::Done(b"", s) => s,
- _ => return IResult::Error(error_code!(ErrorKind::Custom(45))),
- },
- _ => return IResult::Error(error_code!(ErrorKind::Custom(46))),
- };
+ pub fn parts<'a>(input: &'a [u8], boundary: &[u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
+ alt((
+ parts_f(boundary),
+ |input: &'a [u8]| -> IResult<&'a [u8], Vec<&'a [u8]>> {
+ let (input, _) = take_until(&b"--"[..])(input)?;
+ let (input, _) = take_until(boundary)(input)?;
+ Ok((input, Vec::<&[u8]>::new()))
+ },
+ ))(input)
+ /*
+ alt_complete!(call!(parts_f, boundary) | do_parse!(
+ take_until_and_consume!(&b"--"[..]) >>
+ take_until_and_consume!(boundary) >>
+ ( { Vec::<&[u8]>::new() } ))
+ ));
+ */
+ }
- let charset = Charset::from(&input[2..tag_end_idx]);
+ /* Caution: values should be passed through phrase() */
+ pub fn content_type_parameter(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
+ let (input, _) = tag(";")(input)?;
+ let (input, name) = terminated(take_until("="), tag("="))(input.ltrim())?;
+ let (input, value) = alt((
+ delimited(tag("\""), take_until("\""), tag("\"")),
+ is_not(";"),
+ ))(input.ltrim())?;
- if let Charset::UTF8 = charset {
- IResult::Done(&input[encoded_end_idx + 2..], s)
- } else {
- match decode_charset(&s, charset) {
- Ok(v) => IResult::Done(&input[encoded_end_idx + 2..], v.into_bytes()),
- _ => IResult::Error(error_code!(ErrorKind::Custom(43))),
- }
+ Ok((input, (name, value)))
}
-}
-pub fn decode_charset(s: &[u8], charset: Charset) -> Result<String> {
- match charset {
- Charset::UTF8 | Charset::Ascii => Ok(String::from_utf8_lossy(s).to_string()),
- Charset::ISO8859_1 => Ok(ISO_8859_1.decode(s, DecoderTrap::Strict)?),
- Charset::ISO8859_2 => Ok(ISO_8859_2.decode(s, DecoderTrap::Strict)?),
- Charset::ISO8859_7 => Ok(ISO_8859_7.decode(s, DecoderTrap::Strict)?),
- Charset::ISO8859_15 => Ok(ISO_8859_15.decode(s, DecoderTrap::Strict)?),
- Charset::GBK => Ok(GBK.decode(s, DecoderTrap::Strict)?),
- Charset::Windows1250 => Ok(WINDOWS_1250.decode(s, DecoderTrap::Strict)?),
- Charset::Windows1251 => Ok(WINDOWS_1251.decode(s, DecoderTrap::Strict)?),
- Charset::Windows1252 => Ok(WINDOWS_1252.decode(s, DecoderTrap::Strict)?),
- Charset::Windows1253 => Ok(WINDOWS_1253.decode(s, DecoderTrap::Strict)?),
- // Unimplemented:
- Charset::GB2312 => Ok(String::from_utf8_lossy(s).to_string()),
- Charset::UTF16 => Ok(String::from_utf8_lossy(s).to_string()),
- Charset::BIG5 => Ok(String::from_utf8_lossy(s).to_string()),
- Charset::ISO2022JP => Ok(String::from_utf8_lossy(s).to_string()),
+ pub fn content_type(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8], Vec<(&[u8], &[u8])>)> {
+ let (input, _type) = take_until("/")(input)?;
+ let (input, _) = tag("/")(input)?;
+ let (input, _subtype) = is_not(";")(input)?;
+ let (input, parameters) = many0(content_type_parameter)(input)?;
+ Ok((input, (_type, _subtype, parameters)))
+ /*
+ do_parse!(
+ _type: take_until!("/") >>
+ tag!("/") >>
+ _subtype: is_not!(";") >>
+ parameters: many0!(complete!(content_type_parameter)) >>
+ ( {
+ (_type, _subtype, parameters)
+ } )
+ ));
+ */
}
}
-fn quoted_printable_soft_break(input: &[u8]) -> IResult<&[u8], &[u8]> {
- if input.len() < 2 {
- IResult::Incomplete(Needed::Size(1))
- } else if input[0] == b'=' && input[1] == b'\n' {
- IResult::Done(&input[2..], &input[0..2]) // `=\n` is an escaped space character.
- } else if input.len() > 3 && input.starts_with(b"=\r\n") {
- IResult::Done(&input[3..], &input[0..3]) // `=\r\n` is an escaped space character.
- } else {
- IResult::Error(error_code!(ErrorKind::Custom(43)))
+pub mod encodings {
+ use super::*;
+ use crate::email::attachment_types::Charset;
+ use data_encoding::BASE64_MIME;
+ use encoding::all::*;
+ use encoding::{DecoderTrap, Encoding};
+ pub fn quoted_printable_byte(input: &[u8]) -> IResult<&[u8], u8> {
+ if input.len() < 3 {
+ Err(nom::Err::Error((input, ErrorKind::Tag)))
+ } else if input[0] == b'=' && is_hex_digit(input[1]) && is_hex_digit(input[2]) {
+ let a = if input[1] < b':' {
+ input[1] - 48
+ } else if input[1] < b'[' {
+ input[1] - 55
+ } else {
+ input[1] - 87
+ };
+ let b = if input[2] < b':' {
+ input[2] - 48
+ } else if input[2] < b'[' {
+ input[2] - 55
+ } else {
+ input[2] - 87
+ };
+ Ok((&input[3..], a * 16 + b))
+ } else if input.starts_with(b"\r\n") {
+ Ok((&input[2..], b'\n'))
+ } else {
+ Err(nom::Err::Error((input, ErrorKind::Tag)))
+ }
}
-}
-named!(
- qp_underscore_header<u8>,
- do_parse!(tag!(b"_") >> ({ 0x20 }))
-);
-
-// With MIME, headers in quoted printable format can contain underscores that represent spaces.
-// In non-header context, an underscore is just a plain underscore.
-named!(
- pub quoted_printable_bytes_header<Vec<u8>>,
- many0!(alt_complete!(
- quoted_printable_byte | qp_underscore_header | le_u8
- ))
-);
-
-// For atoms in Header values.
-named!(
- pub quoted_printable_bytes<Vec<u8>>,
- many0!(alt_complete!(
- preceded!(quoted_printable_soft_break, quoted_printable_byte) |
- preceded!(quoted_printable_soft_break, le_u8) | quoted_printable_byte | le_u8
- ))
-);
-
-fn display_addr(input: &[u8]) -> IResult<&[u8], Address> {
- if input.is_empty() || input.len() < 3 {
- IResult::Incomplete(Needed::Size(1))
- } else if !is_whitespace!(input[0]) {
- let mut display_name = StrBuilder {
- offset: 0,
- length: 0,
- };
- let mut flag = false;
- for (i, b) in input[0..].iter().enumerate() {
- if *b == b'<' {
- display_name.length = i.saturating_sub(1); // if i != 0 { i - 1 } else { 0 };
- flag = true;
+ /* Encoded words
+ *"=?charset?encoding?encoded text?=".
+ */
+ fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
+ if input.is_empty() {
+ return Ok((&[], Vec::with_capacity(0)));
+ }
+ if input.len() < 5 {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ } else if input[0] != b'=' || input[1] != b'?' {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ /* find end of Charset tag:
+ * =?charset?encoding?encoded text?=
+ * ---------^
+ */
+ let mut tag_end_idx = None;
+ for (idx, b) in input[2..].iter().enumerate() {
+ if *b == b'?' {
+ tag_end_idx = Some(idx + 2);
break;
}
}
- if !flag {
- let (rest, output) = match phrase(input, false) {
- IResult::Done(rest, raw) => (rest, raw),
- _ => return IResult::Error(error_code!(ErrorKind::Custom(43))),
- };
- if output.contains(&b'<') {
- match display_addr(&output) {
- IResult::Done(_, address) => return IResult::Done(rest, address),
- _ => return IResult::Error(error_code!(ErrorKind::Custom(43))),
- }
- }
- return IResult::Error(error_code!(ErrorKind::Custom(43)));
+ if tag_end_idx.is_none() {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
}
- let mut end = input.len();
- let mut at_flag = false;
- let mut flag = false;
- for (i, b) in input[display_name.length + 2..].iter().enumerate() {
- match *b {
- b'@' => at_flag = true,
- b'>' => {
- end = i;
- flag = true;
- break;
- }
- _ => {}
+ let tag_end_idx = tag_end_idx.unwrap();
+
+ if tag_end_idx + 2 >= input.len() || input[2 + tag_end_idx] != b'?' {
+ return Err(nom::Err::Error((input, ErrorKind::Tag)));
+ }
+ /* See if input ends with "?=" and get ending index
+ * =?charset?encoding?encoded text?=
+ * -------------------------------^
+ */
+ let mut encoded_end_idx = None;
+ for i in (3 + tag