summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorufoscout <ufoscout@gmail.com>2019-05-28 13:16:13 +0200
committerKartikaya Gupta <kats@trevize.staktrace.com>2019-06-04 16:36:21 -0400
commita4a0e917580cf465ffd1aa8ae6723860aab23c6f (patch)
treea80b872fe63634c20d1507723e4d720dab973370 /src
parentb47729baba3af4994a46e101c429e79023b347b2 (diff)
Allow access to encoded body
Diffstat (limited to 'src')
-rw-r--r--src/body.rs152
-rw-r--r--src/lib.rs205
2 files changed, 331 insertions, 26 deletions
diff --git a/src/body.rs b/src/body.rs
new file mode 100644
index 0000000..ed1b617
--- /dev/null
+++ b/src/body.rs
@@ -0,0 +1,152 @@
+use charset::{decode_ascii, Charset};
+use {MailParseError, ParsedContentType};
+
+/// Represents the body of an email (or mail subpart)
+pub enum Body<'a> {
+ /// A body with 'base64' Content-Transfer-Encoding.
+ Base64(EncodedBody<'a>),
+ /// A body with 'quoted-printable' Content-Transfer-Encoding.
+ QuotedPrintable(EncodedBody<'a>),
+ /// A body with '7bit' Content-Transfer-Encoding.
+ SevenBit(TextBody<'a>),
+ /// A body with '8bit' Content-Transfer-Encoding.
+ EightBit(TextBody<'a>),
+ /// A body with 'binary' Content-Transfer-Encoding.
+ Binary(BinaryBody<'a>),
+}
+
+impl<'a> Body<'a> {
+ pub fn new(
+ body: &'a [u8],
+ ctype: &'a ParsedContentType,
+ transfer_encoding: &Option<String>,
+ ) -> Body<'a> {
+ transfer_encoding
+ .as_ref()
+ .map(|encoding| match encoding.as_ref() {
+ "base64" => Body::Base64(EncodedBody {
+ decoder: decode_base64,
+ body,
+ ctype,
+ }),
+ "quoted-printable" => Body::QuotedPrintable(EncodedBody {
+ decoder: decode_quoted_printable,
+ body,
+ ctype,
+ }),
+ "7bit" => Body::SevenBit(TextBody { body, ctype }),
+ "8bit" => Body::EightBit(TextBody { body, ctype }),
+ "binary" => Body::Binary(BinaryBody { body, ctype }),
+ _ => Body::get_default(body, ctype),
+ })
+ .unwrap_or_else(|| Body::get_default(body, ctype))
+ }
+
+ fn get_default(body: &'a [u8], ctype: &'a ParsedContentType) -> Body<'a> {
+ Body::SevenBit(TextBody { body, ctype })
+ }
+}
+
+/// Struct that holds the encoded body representation of the message (or message subpart).
+pub struct EncodedBody<'a> {
+ decoder: fn(&[u8]) -> Result<Vec<u8>, MailParseError>,
+ ctype: &'a ParsedContentType,
+ body: &'a [u8],
+}
+
+impl<'a> EncodedBody<'a> {
+ /// Get the body Content-Type
+ pub fn get_content_type(&self) -> &'a ParsedContentType {
+ self.ctype
+ }
+
+ /// Get the raw body of the message exactly as it is written in the message (or message subpart).
+ pub fn get_raw(&self) -> &'a [u8] {
+ self.body
+ }
+
+ /// Get the decoded body of the message (or message subpart).
+ pub fn get_decoded(&self) -> Result<Vec<u8>, MailParseError> {
+ (self.decoder)(self.body)
+ }
+
+ /// Get the body of the message as a Rust string.
+ /// This function tries to decode the body and then converts
+ /// the result into a Rust UTF-8 string using the charset in the Content-Type
+ /// (or "us-ascii" if the charset was missing or not recognized).
+ /// This operation returns a valid result only if the decoded body
+ /// has a text format.
+ pub fn get_decoded_as_string(&self) -> Result<String, MailParseError> {
+ get_body_as_string(&self.get_decoded()?, &self.ctype)
+ }
+}
+
+/// Struct that holds the textual body representation of the message (or message subpart).
+pub struct TextBody<'a> {
+ ctype: &'a ParsedContentType,
+ body: &'a [u8],
+}
+
+impl<'a> TextBody<'a> {
+ /// Get the body Content-Type
+ pub fn get_content_type(&self) -> &'a ParsedContentType {
+ self.ctype
+ }
+
+ /// Get the raw body of the message exactly as it is written in the message (or message subpart).
+ pub fn get_raw(&self) -> &'a [u8] {
+ self.body
+ }
+
+ /// Get the body of the message as a Rust string.
+ /// This function converts the body into a Rust UTF-8 string using the charset
+ /// in the Content-Type
+ /// (or "us-ascii" if the charset was missing or not recognized).
+ pub fn get_as_string(&self) -> Result<String, MailParseError> {
+ get_body_as_string(self.body, &self.ctype)
+ }
+}
+
+/// Struct that holds a binary body representation of the message (or message subpart).
+pub struct BinaryBody<'a> {
+ ctype: &'a ParsedContentType,
+ body: &'a [u8],
+}
+
+impl<'a> BinaryBody<'a> {
+ /// Get the body Content-Type
+ pub fn get_content_type(&self) -> &'a ParsedContentType {
+ self.ctype
+ }
+
+ /// Get the raw body of the message exactly as it is written in the message (or message subpart).
+ pub fn get_raw(&self) -> &'a [u8] {
+ self.body
+ }
+}
+
+fn decode_base64(body: &[u8]) -> Result<Vec<u8>, MailParseError> {
+ let cleaned = body
+ .iter()
+ .filter(|c| !c.is_ascii_whitespace())
+ .cloned()
+ .collect::<Vec<u8>>();
+ Ok(base64::decode(&cleaned)?)
+}
+
+fn decode_quoted_printable(body: &[u8]) -> Result<Vec<u8>, MailParseError> {
+ Ok(quoted_printable::decode(
+ body,
+ quoted_printable::ParseMode::Robust,
+ )?)
+}
+
+fn get_body_as_string(body: &[u8], ctype: &ParsedContentType) -> Result<String, MailParseError> {
+ let cow = if let Some(charset) = Charset::for_label(ctype.charset.as_bytes()) {
+ let (cow, _, _) = charset.decode(body);
+ cow
+ } else {
+ decode_ascii(body)
+ };
+ Ok(cow.into_owned())
+}
diff --git a/src/lib.rs b/src/lib.rs
index c727c58..7af26bc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -7,12 +7,13 @@ use std::error;
use std::fmt;
use std::ops::Deref;
-use charset::decode_ascii;
use charset::decode_latin1;
use charset::Charset;
+pub mod body;
mod dateparse;
+use body::Body;
pub use dateparse::dateparse;
/// An error type that represents the different kinds of errors that may be
@@ -650,14 +651,13 @@ impl<'a> ParsedMail<'a> {
/// assert_eq!(p.get_body().unwrap(), "This is the body");
/// ```
pub fn get_body(&self) -> Result<String, MailParseError> {
- let decoded = self.get_body_raw()?;
- let cow = if let Some(charset) = Charset::for_label(self.ctype.charset.as_bytes()) {
- let (cow, _, _) = charset.decode(&decoded);
- cow
- } else {
- decode_ascii(&decoded)
- };
- Ok(cow.into_owned())
+ match self.get_body_encoded()? {
+ Body::Base64(body) | Body::QuotedPrintable(body) => body.get_decoded_as_string(),
+ Body::SevenBit(body) | Body::EightBit(body) => body.get_as_string(),
+ Body::Binary(_) => Err(MailParseError::Generic(
+ "Message body of type binary body cannot be parsed into a string",
+ )),
+ }
}
/// Get the body of the message as a Rust Vec<u8>. This function tries to
@@ -675,27 +675,59 @@ impl<'a> ParsedMail<'a> {
/// assert_eq!(p.get_body_raw().unwrap(), b"This is the body");
/// ```
pub fn get_body_raw(&self) -> Result<Vec<u8>, MailParseError> {
- let transfer_coding = self
+ match self.get_body_encoded()? {
+ Body::Base64(body) | Body::QuotedPrintable(body) => body.get_decoded(),
+ Body::SevenBit(body) | Body::EightBit(body) => Ok(Vec::<u8>::from(body.get_raw())),
+ Body::Binary(body) => Ok(Vec::<u8>::from(body.get_raw())),
+ }
+ }
+
+ /// Get the body of the message.
+ /// This function returns original the body without attempting to
+ /// unapply the Content-Transfer-Encoding.
+ ///
+ /// # Examples
+ /// ```
+ /// use mailparse::parse_mail;
+ /// use mailparse::body::Body;
+ ///
+ /// let mail = parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap();
+ ///
+ /// match mail.get_body_encoded().unwrap() {
+ /// Body::Base64(body) => {
+ /// assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ=");
+ /// assert_eq!(body.get_decoded().unwrap(), b"hello world");
+ /// assert_eq!(body.get_decoded_as_string().unwrap(), "hello world");
+ /// },
+ /// _ => assert!(false),
+ /// };
+ ///
+ ///
+ /// // An email whose body encoding is not known upfront
+ /// let another_mail = parse_mail(b"").unwrap();
+ ///
+ /// match another_mail.get_body_encoded().unwrap() {
+ /// Body::Base64(body) | Body::QuotedPrintable(body) => {
+ /// println!("mail body encoded: {:?}", body.get_raw());
+ /// println!("mail body decoded: {:?}", body.get_decoded().unwrap());
+ /// println!("mail body decoded as string: {}", body.get_decoded_as_string().unwrap());
+ /// },
+ /// Body::SevenBit(body) | Body::EightBit(body) => {
+ /// println!("mail body: {:?}", body.get_raw());
+ /// println!("mail body as string: {}", body.get_as_string().unwrap());
+ /// },
+ /// Body::Binary(body) => {
+ /// println!("mail body binary: {:?}", body.get_raw());
+ /// }
+ /// }
+ /// ```
+ pub fn get_body_encoded(&'a self) -> Result<Body<'a>, MailParseError> {
+ let transfer_encoding = self
.headers
.get_first_value("Content-Transfer-Encoding")?
.map(|s| s.to_lowercase());
- let decoded = match transfer_coding {
- Some(ref enc) if enc == "base64" => {
- let cleaned = self
- .body
- .iter()
- .filter(|c| !c.is_ascii_whitespace())
- .cloned()
- .collect::<Vec<u8>>();
- base64::decode(&cleaned)?
- }
- Some(ref enc) if enc == "quoted-printable" => {
- quoted_printable::decode(self.body, quoted_printable::ParseMode::Robust)?
- }
- _ => Vec::<u8>::from(self.body),
- };
- Ok(decoded)
+ Ok(Body::new(self.body, &self.ctype, &transfer_encoding))
}
/// Returns a struct containing a parsed representation of the
@@ -1265,4 +1297,125 @@ mod tests {
let parsed = parse_param_content(r#"Content-Type: application/octet-stream; name=""#);
assert_eq!(parsed.params["name"], "\"");
}
+
+ #[test]
+ fn test_default_content_encoding() {
+ let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\n\r\n+JgM-").unwrap();
+ let body = mail.get_body_encoded().unwrap();
+ match body {
+ Body::SevenBit(body) => {
+ assert_eq!(body.get_raw(), b"+JgM-");
+ assert_eq!(body.get_as_string().unwrap(), "\u{2603}");
+ }
+ _ => assert!(false),
+ };
+ }
+
+ #[test]
+ fn test_7bit_content_encoding() {
+ let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: 7bit\r\n\r\n+JgM-").unwrap();
+ let body = mail.get_body_encoded().unwrap();
+ match body {
+ Body::SevenBit(body) => {
+ assert_eq!(body.get_raw(), b"+JgM-");
+ assert_eq!(body.get_as_string().unwrap(), "\u{2603}");
+ }
+ _ => assert!(false),
+ };
+ }
+
+ #[test]
+ fn test_8bit_content_encoding() {
+ let mail = parse_mail(b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: 8bit\r\n\r\n+JgM-").unwrap();
+ let body = mail.get_body_encoded().unwrap();
+ match body {
+ Body::EightBit(body) => {
+ assert_eq!(body.get_raw(), b"+JgM-");
+ assert_eq!(body.get_as_string().unwrap(), "\u{2603}");
+ }
+ _ => assert!(false),
+ };
+ }
+
+ #[test]
+ fn test_quoted_printable_content_encoding() {
+ let mail = parse_mail(
+ b"Content-Type: text/plain; charset=UTF-7\r\nContent-Transfer-Encoding: quoted-printable\r\n\r\n+JgM-",
+ ).unwrap();
+ match mail.get_body_encoded().unwrap() {
+ Body::QuotedPrintable(body) => {
+ assert_eq!(body.get_raw(), b"+JgM-");
+ assert_eq!(body.get_decoded().unwrap(), b"+JgM-");
+ assert_eq!(body.get_decoded_as_string().unwrap(), "\u{2603}");
+ }
+ _ => assert!(false),
+ };
+ }
+
+ #[test]
+ fn test_base64_content_encoding() {
+ let mail =
+ parse_mail(b"Content-Transfer-Encoding: base64\r\n\r\naGVsbG 8gd\r\n29ybGQ=").unwrap();
+ match mail.get_body_encoded().unwrap() {
+ Body::Base64(body) => {
+ assert_eq!(body.get_raw(), b"aGVsbG 8gd\r\n29ybGQ=");
+ assert_eq!(body.get_decoded().unwrap(), b"hello world");
+ assert_eq!(body.get_decoded_as_string().unwrap(), "hello world");
+ }
+ _ => assert!(false),
+ };
+ }
+
+ #[test]
+ fn test_binary_content_encoding() {
+ let mail = parse_mail(b"Content-Transfer-Encoding: binary\r\n\r\n######").unwrap();
+ let body = mail.get_body_encoded().unwrap();
+ match body {
+ Body::Binary(body) => {
+ assert_eq!(body.get_raw(), b"######");
+ }
+ _ => assert!(false),
+ };
+ }
+
+ #[test]
+ fn test_body_content_encoding_with_multipart() {
+ let mail_filepath = "./tests/files/test_email_01.txt";
+ let mail = std::fs::read(mail_filepath)
+ .expect(&format!("Unable to open the file [{}]", mail_filepath));
+ let mail = parse_mail(&mail).unwrap();
+
+ let subpart_0 = mail.subparts.get(0).unwrap();
+ match subpart_0.get_body_encoded().unwrap() {
+ Body::SevenBit(body) => {
+ assert_eq!(
+ body.get_as_string().unwrap().trim(),
+ "<html>Test with attachments</html>"
+ );
+ }
+ _ => assert!(false),
+ };
+
+ let subpart_1 = mail.subparts.get(1).unwrap();
+ match subpart_1.get_body_encoded().unwrap() {
+ Body::Base64(body) => {
+ let pdf_filepath = "./tests/files/test_email_01_sample.pdf";
+ let original_pdf = std::fs::read(pdf_filepath)
+ .expect(&format!("Unable to open the file [{}]", pdf_filepath));
+ assert_eq!(body.get_decoded().unwrap(), original_pdf);
+ }
+ _ => assert!(false),
+ };
+
+ let subpart_2 = mail.subparts.get(2).unwrap();
+ match subpart_2.get_body_encoded().unwrap() {
+ Body::Base64(body) => {
+ assert_eq!(
+ body.get_decoded_as_string().unwrap(),
+ "txt file context for email collector\n1234567890987654321\n"
+ );
+ }
+ _ => assert!(false),
+ };
+ }
}