summaryrefslogtreecommitdiffstats
path: root/src/body.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/body.rs')
-rw-r--r--src/body.rs152
1 files changed, 152 insertions, 0 deletions
diff --git a/src/body.rs b/src/body.rs
new file mode 100644
index 0000000..ed1b617
--- /dev/null
+++ b/src/body.rs
@@ -0,0 +1,152 @@
+use charset::{decode_ascii, Charset};
+use {MailParseError, ParsedContentType};
+
+/// Represents the body of an email (or mail subpart)
+pub enum Body<'a> {
+ /// A body with 'base64' Content-Transfer-Encoding.
+ Base64(EncodedBody<'a>),
+ /// A body with 'quoted-printable' Content-Transfer-Encoding.
+ QuotedPrintable(EncodedBody<'a>),
+ /// A body with '7bit' Content-Transfer-Encoding.
+ SevenBit(TextBody<'a>),
+ /// A body with '8bit' Content-Transfer-Encoding.
+ EightBit(TextBody<'a>),
+ /// A body with 'binary' Content-Transfer-Encoding.
+ Binary(BinaryBody<'a>),
+}
+
+impl<'a> Body<'a> {
+ pub fn new(
+ body: &'a [u8],
+ ctype: &'a ParsedContentType,
+ transfer_encoding: &Option<String>,
+ ) -> Body<'a> {
+ transfer_encoding
+ .as_ref()
+ .map(|encoding| match encoding.as_ref() {
+ "base64" => Body::Base64(EncodedBody {
+ decoder: decode_base64,
+ body,
+ ctype,
+ }),
+ "quoted-printable" => Body::QuotedPrintable(EncodedBody {
+ decoder: decode_quoted_printable,
+ body,
+ ctype,
+ }),
+ "7bit" => Body::SevenBit(TextBody { body, ctype }),
+ "8bit" => Body::EightBit(TextBody { body, ctype }),
+ "binary" => Body::Binary(BinaryBody { body, ctype }),
+ _ => Body::get_default(body, ctype),
+ })
+ .unwrap_or_else(|| Body::get_default(body, ctype))
+ }
+
+ fn get_default(body: &'a [u8], ctype: &'a ParsedContentType) -> Body<'a> {
+ Body::SevenBit(TextBody { body, ctype })
+ }
+}
+
+/// Struct that holds the encoded body representation of the message (or message subpart).
+pub struct EncodedBody<'a> {
+ decoder: fn(&[u8]) -> Result<Vec<u8>, MailParseError>,
+ ctype: &'a ParsedContentType,
+ body: &'a [u8],
+}
+
+impl<'a> EncodedBody<'a> {
+ /// Get the body Content-Type
+ pub fn get_content_type(&self) -> &'a ParsedContentType {
+ self.ctype
+ }
+
+ /// Get the raw body of the message exactly as it is written in the message (or message subpart).
+ pub fn get_raw(&self) -> &'a [u8] {
+ self.body
+ }
+
+ /// Get the decoded body of the message (or message subpart).
+ pub fn get_decoded(&self) -> Result<Vec<u8>, MailParseError> {
+ (self.decoder)(self.body)
+ }
+
+ /// Get the body of the message as a Rust string.
+ /// This function tries to decode the body and then converts
+ /// the result into a Rust UTF-8 string using the charset in the Content-Type
+ /// (or "us-ascii" if the charset was missing or not recognized).
+ /// This operation returns a valid result only if the decoded body
+ /// has a text format.
+ pub fn get_decoded_as_string(&self) -> Result<String, MailParseError> {
+ get_body_as_string(&self.get_decoded()?, &self.ctype)
+ }
+}
+
+/// Struct that holds the textual body representation of the message (or message subpart).
+pub struct TextBody<'a> {
+ ctype: &'a ParsedContentType,
+ body: &'a [u8],
+}
+
+impl<'a> TextBody<'a> {
+ /// Get the body Content-Type
+ pub fn get_content_type(&self) -> &'a ParsedContentType {
+ self.ctype
+ }
+
+ /// Get the raw body of the message exactly as it is written in the message (or message subpart).
+ pub fn get_raw(&self) -> &'a [u8] {
+ self.body
+ }
+
+ /// Get the body of the message as a Rust string.
+ /// This function converts the body into a Rust UTF-8 string using the charset
+ /// in the Content-Type
+ /// (or "us-ascii" if the charset was missing or not recognized).
+ pub fn get_as_string(&self) -> Result<String, MailParseError> {
+ get_body_as_string(self.body, &self.ctype)
+ }
+}
+
+/// Struct that holds a binary body representation of the message (or message subpart).
+pub struct BinaryBody<'a> {
+ ctype: &'a ParsedContentType,
+ body: &'a [u8],
+}
+
+impl<'a> BinaryBody<'a> {
+ /// Get the body Content-Type
+ pub fn get_content_type(&self) -> &'a ParsedContentType {
+ self.ctype
+ }
+
+ /// Get the raw body of the message exactly as it is written in the message (or message subpart).
+ pub fn get_raw(&self) -> &'a [u8] {
+ self.body
+ }
+}
+
+fn decode_base64(body: &[u8]) -> Result<Vec<u8>, MailParseError> {
+ let cleaned = body
+ .iter()
+ .filter(|c| !c.is_ascii_whitespace())
+ .cloned()
+ .collect::<Vec<u8>>();
+ Ok(base64::decode(&cleaned)?)
+}
+
+fn decode_quoted_printable(body: &[u8]) -> Result<Vec<u8>, MailParseError> {
+ Ok(quoted_printable::decode(
+ body,
+ quoted_printable::ParseMode::Robust,
+ )?)
+}
+
+fn get_body_as_string(body: &[u8], ctype: &ParsedContentType) -> Result<String, MailParseError> {
+ let cow = if let Some(charset) = Charset::for_label(ctype.charset.as_bytes()) {
+ let (cow, _, _) = charset.decode(body);
+ cow
+ } else {
+ decode_ascii(body)
+ };
+ Ok(cow.into_owned())
+}