summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorAlexandros Frantzis <alf82@freemail.gr>2019-10-01 23:06:38 +0300
committerAlexandros Frantzis <alf82@freemail.gr>2019-10-01 23:36:26 +0300
commita054789ddb60ed1fab26e6d4e6bd36ed926273f1 (patch)
treebd3caad78a0e377816b78276889301d9276344b9 /src
Initial public release
Diffstat (limited to 'src')
-rw-r--r--src/decode.rs224
-rw-r--r--src/deliver.rs176
-rw-r--r--src/lib.rs373
-rw-r--r--src/normalize.rs477
-rw-r--r--src/processing.rs95
-rw-r--r--src/regex.rs114
6 files changed, 1459 insertions, 0 deletions
diff --git a/src/decode.rs b/src/decode.rs
new file mode 100644
index 0000000..8004f18
--- /dev/null
+++ b/src/decode.rs
@@ -0,0 +1,224 @@
+// Copyright 2019 Alexandros Frantzis
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+//
+// SPDX-License-Identifier: MPL-2.0
+
+//! Base64 and quoted-printable decoding.
+
+use crate::Result;
+
+const PAD: u8 = 64; // The pseudo-index of the PAD character.
+const INV: u8 = 99; // An invalid index.
+
+static BASE64_INDICES: &'static [u8] = &[
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
+/* 0 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* 1 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* 2 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, 62, INV, INV, INV, 63,
+/* 3 */ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, INV, INV, INV, PAD, INV, INV,
+/* 4 */ INV, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+/* 5 */ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, INV, INV, INV, INV, INV,
+/* 6 */ INV, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+/* 7 */ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, INV, INV, INV, INV, INV,
+/* 8 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* 9 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* A */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* B */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* C */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* D */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* E */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* F */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+];
+
+/// Decodes base64 encoded data, appending the decoded data to a Vec<u8>.
+///
+/// During decoding all line breaks and invalid characters are ignored.
+/// If an error is encountered during decoding, the already decoded data in the
+/// output buffer is left intact. It's up to the caller to deal with the partial
+/// decoded data in case of failure.
+pub fn base64_decode_into_buf(input: &[u8], output: &mut Vec<u8>) -> Result<()> {
+ let mut num_chars = 0;
+ let mut cur_triplet = 0;
+ let mut valid_chars = 0;
+
+ for c in input {
+ let ci = BASE64_INDICES[*c as usize];
+ match ci {
+ // rfc2045: All line breaks or other characters not
+ // found in Table 1 must be ignored by decoding software.
+ INV => continue,
+ _ if ci < PAD => valid_chars += 1,
+ _ => {}
+ }
+
+ cur_triplet = cur_triplet << 6 | ((ci & 0x3f) as u32);
+ num_chars += 1;
+
+ if num_chars == 4 {
+ match valid_chars {
+ 2 => output.push((cur_triplet >> 16) as u8),
+ 3 => output.extend(
+ &[(cur_triplet >> 16) as u8, (cur_triplet >> 8) as u8]
+ ),
+ 4 => output.extend(
+ &[(cur_triplet >> 16) as u8,
+ (cur_triplet >> 8) as u8,
+ cur_triplet as u8
+ ]
+ ),
+ _ => return Err("Invalid base64 encoding".into()),
+ }
+
+ cur_triplet = 0;
+ num_chars = 0;
+ valid_chars = 0;
+ }
+ }
+
+ // rfc2045: A full encoding quantum is always completed at the end of a body.
+ if num_chars != 0 {
+ return Err("Unpadded input".into());
+ }
+
+ Ok(())
+}
+
+/// Converts an ascii byte representing a hex digit to it's numerical value.
+fn hexdigit_to_num(mut a: u8) -> Option<u8> {
+ if a.is_ascii_digit() {
+ return Some(a - b'0');
+ }
+
+ a.make_ascii_lowercase();
+
+ if a >= b'a' && a <= b'f' {
+ return Some(a - b'a' + 10);
+ }
+
+ None
+}
+
+/// Decodes quoted-printable encoded data, appending the decoding data to a
+/// Vec<u8>.
+///
+/// During decoding all line breaks and invalid characters are ignored.
+/// If an error is encountered during decoding, the already decoded data in the
+/// output buffer is left intact. It's up to the caller to deal with the partial
+/// decoded data in case of failure.
+pub fn qp_decode_into_buf(input: &[u8], output: &mut Vec<u8>) -> Result<()> {
+ let mut iter = input.iter().peekable();
+
+ 'outer: loop {
+ loop {
+ match iter.next() {
+ Some(b'=') => break,
+ Some(c) => output.push(*c),
+ None => break 'outer,
+ }
+ }
+
+ // At this point we have encountered a '=', so check
+ // to see what follows.
+ if let Some(&first) = iter.next() {
+ // A CRLF/LF after '=' marks a line continuation, and
+ // is effectively dropped.
+ if first == b'\r' {
+ if iter.peek() == Some(&&b'\n') {
+ iter.next();
+ continue;
+ }
+ } else if first == b'\n' {
+ continue;
+ } else if let Some(first_num) = hexdigit_to_num(first) {
+ // A valid pair of hexdigits represent the raw byte value.
+ if let Some(&&second) = iter.peek() {
+ if let Some(second_num) = hexdigit_to_num(second) {
+ output.push(first_num * 16 + second_num);
+ iter.next();
+ continue;
+ }
+ }
+ }
+
+ // Emit the raw sequence if it's not one of the special
+ // special cases checked above.
+ output.extend(&[b'=', first]);
+ } else {
+ // Last character in the input was an '=', just emit it.
+ output.push(b'=');
+ }
+ }
+
+
+ Ok(())
+}
+
+#[cfg(test)]
+mod test_base64 {
+ use crate::decode::base64_decode_into_buf;
+
+ #[test]
+ fn decodes_full_length() {
+ let mut decoded = Vec::new();
+ assert!(base64_decode_into_buf("YWJj".as_bytes(), &mut decoded).is_ok());
+ assert_eq!(decoded, &[b'a', b'b', b'c']);
+ }
+
+ #[test]
+ fn decodes_with_two_padding() {
+ let mut decoded = Vec::new();
+ assert!(base64_decode_into_buf("YWJjZA==".as_bytes(), &mut decoded).is_ok());
+ assert_eq!(decoded, &[b'a', b'b', b'c', b'd']);
+ }
+
+ #[test]
+ fn decodes_with_one_padding() {
+ let mut decoded = Vec::new();
+ assert!(base64_decode_into_buf("YWJjZGU=".as_bytes(), &mut decoded).is_ok());
+ assert_eq!(decoded, &[b'a', b'b', b'c', b'd', b'e']);
+ }
+
+ #[test]
+ fn error_with_invalid_paddings() {
+ let mut decoded = Vec::new();
+ assert!(base64_decode_into_buf("YWJj====".as_bytes(), &mut decoded).is_err());
+ assert!(base64_decode_into_buf("YWJjZ===".as_bytes(), &mut decoded).is_err());
+ assert!(base64_decode_into_buf("====".as_bytes(), &mut decoded).is_err());
+ }
+
+ #[test]
+ fn error_with_unpadded_input() {
+ let mut decoded = Vec::new();
+ assert!(base64_decode_into_buf("YWJjZA=".as_bytes(), &mut decoded).is_err());
+ }
+}
+
+#[cfg(test)]
+mod test_qp {
+ use crate::decode::qp_decode_into_buf;
+
+ #[test]
+ fn decodes_byte() {
+ let mut decoded = Vec::new();
+ assert!(qp_decode_into_buf("a=62c=64".as_bytes(), &mut decoded).is_ok());
+ assert_eq!(decoded, &[b'a', b'b', b'c', b'd']);
+ }
+
+ #[test]
+ fn decodes_soft_break() {
+ let mut decoded = Vec::new();
+ assert!(qp_decode_into_buf("a=\r\nb=\nc".as_bytes(), &mut decoded).is_ok());
+ assert_eq!(decoded, &[b'a', b'b', b'c']);
+ }
+
+ #[test]
+ fn invalid_sequences_are_untouched() {
+ let mut decoded = Vec::new();
+ let invalid_sequence = "a=6t= c=".as_bytes();
+ assert!(qp_decode_into_buf(invalid_sequence, &mut decoded).is_ok());
+ assert_eq!(decoded, invalid_sequence);
+ }
+}
diff --git a/src/deliver.rs b/src/deliver.rs
new file mode 100644
index 0000000..8ade10f
--- /dev/null
+++ b/src/deliver.rs
@@ -0,0 +1,176 @@
+// Copyright 2019 Alexandros Frantzis
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+//
+// SPDX-License-Identifier: MPL-2.0
+
+//! Email delivery functionality.
+
+use std::fs::{self, File};
+use std::io::ErrorKind;
+use std::io::prelude::*;
+use std::os::unix::prelude::*;
+use std::path::{PathBuf, Path};
+use std::process;
+use std::sync::{Arc, Mutex};
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use crate::{DeliveryDurability, Result};
+
+use gethostname::gethostname;
+use libc;
+
+/// A generator for likely unique maildir email filenames.
+///
+/// Using it as an iterator gets a filename that can be used in a maildir
+/// and is likely to be unique.
+pub struct EmailFilenameGenerator {
+ count: usize,
+ max_seen_unix_time: u64,
+ hostname: String,
+}
+
+impl EmailFilenameGenerator {
+ pub fn new() -> Self {
+ // From https://cr.yp.to/proto/maildir.html:
+ // "To deal with invalid host names, replace / with \057 and : with \072"
+ let hostname =
+ gethostname()
+ .to_string_lossy()
+ .into_owned()
+ .replace("/", r"\057")
+ .replace(":", r"\072");
+
+ EmailFilenameGenerator{
+ count: 0,
+ max_seen_unix_time: 0,
+ hostname: hostname,
+ }
+ }
+}
+
+impl Iterator for EmailFilenameGenerator {
+ type Item = String;
+
+ fn next(&mut self) -> Option<String> {
+ let unix_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
+ let pid = process::id();
+
+ if self.max_seen_unix_time < unix_time {
+ self.max_seen_unix_time = unix_time;
+ self.count = 0;
+ } else {
+ self.count += 1;
+ }
+
+ Some(format!("{}.{}_{}.{}", unix_time, pid, self.count, self.hostname))
+ }
+}
+
+/// A representation of a maildir.
+pub struct Maildir {
+ root: PathBuf,
+ email_filename_gen: Arc<Mutex<EmailFilenameGenerator>>,
+}
+
+impl Maildir {
+ /// Opens, or creates if it doesn't a exist, a maildir directory structure
+ /// at the specified path.
+ pub fn open_or_create(
+ mailbox: &Path,
+ email_filename_gen: Arc<Mutex<EmailFilenameGenerator>>
+ ) -> Result<Self> {
+ let root = PathBuf::from(mailbox);
+ for s in &["tmp", "new", "cur"] {
+ let path = root.join(&s);
+ fs::create_dir_all(&path)?;
+ }
+
+ Ok(Maildir{root, email_filename_gen})
+ }
+
+ /// Delivers an email to the maildir by creating a new file with the email data,
+ /// and using the specified DeliveryDurability method.
+ pub fn deliver(
+ &self,
+ data: &[u8],
+ delivery_durability: DeliveryDurability
+ ) -> Result<PathBuf> {
+ loop {
+ let tmp_dir = self.root.join("tmp");
+ let new_dir = self.root.join("new");
+
+ let tmp_email = self.write_email_to_dir(data, &tmp_dir)?;
+ let new_email = new_dir.join(
+ tmp_email.file_name().ok_or("")?.to_str().ok_or("")?);
+
+ let result = fs::hard_link(&tmp_email, &new_email);
+ fs::remove_file(&tmp_email)?;
+
+ match result {
+ Ok(_) => {
+ if delivery_durability == DeliveryDurability::FileAndDirSync {
+ File::open(&new_dir)?.sync_all()?;
+ File::open(&tmp_dir)?.sync_all()?;
+ }
+ return Ok(new_email);
+ },
+ Err(ref err) if err.kind() == ErrorKind::AlreadyExists => {},
+ Err(err) => return Err(err.into()),
+ }
+ }
+ }
+
+ /// Delivers an email to the maildir by hard-linking with an existing file,
+ /// and using the specified DeliveryDurability method.
+ pub fn deliver_with_hard_link(
+ &self,
+ src: &Path,
+ delivery_durability: DeliveryDurability
+ ) -> Result<PathBuf> {
+ loop {
+ let new_dir = self.root.join("new");
+ let new_email = new_dir.join(self.next_email_filename_candidate()?);
+
+ match fs::hard_link(&src, &new_email) {
+ Ok(_) => {
+ if delivery_durability == DeliveryDurability::FileAndDirSync {
+ File::open(&new_dir)?.sync_all()?;
+ }
+ return Ok(new_email);
+ },
+ Err(ref err) if err.kind() == ErrorKind::AlreadyExists => {},
+ Err(err) => return Err(err.into()),
+ }
+ }
+ }
+
+ /// Writes email data to a new file in the specified directory.
+ fn write_email_to_dir(&self, data: &[u8], dir: &Path) -> Result<PathBuf> {
+ loop {
+ let email = dir.join(self.next_email_filename_candidate()?);
+ let result = fs::OpenOptions::new()
+ .create_new(true)
+ .write(true)
+ .custom_flags(libc::O_SYNC)
+ .open(&email);
+
+ match result {
+ Ok(mut f) => {
+ f.write_all(&data)?;
+ return Ok(email);
+ },
+ Err(ref err) if err.kind() == ErrorKind::AlreadyExists => {},
+ Err(err) => return Err(err.into()),
+ }
+ }
+ }
+
+ /// Gets the next email filename candidate from the EmailFilenameGenerator.
+ fn next_email_filename_candidate(&self) -> Result<String> {
+ let mut gen = self.email_filename_gen.lock().map_err(|_| "")?;
+ gen.next().ok_or("".into())
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..bdb8c85
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,373 @@
+// Copyright 2019 Alexandros Frantzis
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+//
+// SPDX-License-Identifier: MPL-2.0
+
+//! The mda crate provides a library for writing custom Mail Deliver Agents. It
+//! supports local delivery to maildirs, access to normalized email byte
+//! data for easier processing, and access to individual header fields.
+//!
+//! Email data normalization involves ensuring header fields are in single
+//! lines, decoding text parts of the message that use some kind of transfer
+//! encoding (e.g., base64), and converting all text to UTF-8. The original
+//! (non-normalized) email data is used during delivery.
+//!
+//! This crate also exposes convenience methods for regular expression searching
+//! and processing/filtering of emails.
+//!
+//! # Email construction
+//!
+//! The [Email struct](struct.Email.html) is the basic abstraction of the `mda`
+//! crate. To construct an Email use the
+//! [Email::from_stdin](struct.Email.html#method.from_stdin) or
+//! [Email::from_vec](struct.Email.html#method.from_vec) method.
+//!
+//! ```no_run
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! let email = Email::from_vec(vec![97, 98, 99])?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Email delivery
+//!
+//! Use the
+//! [Email::deliver_to_maildir](struct.Email.html#method.deliver_to_maildir)
+//! method to deliver the email to local maildir directories. Note that
+//! the original (non-normalized) email data is used during delivery.
+//!
+//! ```no_run
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! email.deliver_to_maildir("/my/maildir/path")?;
+//! email.deliver_to_maildir("/my/other/maildir/path")?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Accessing email header fields
+//!
+//! Use the [Email::header_field](struct.Email.html#method.header_field) and
+//! [Email::header_field_all_occurrences](struct.Email.html#method.header_field_all_occurrences)
+//! methods to access the email header fields. Any MIME encoded words in the
+//! header field values are decoded and the field value is converted to UTF-8.
+//!
+//! ```no_run
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! let to = email.header_field("To").unwrap_or("");
+//! if to.contains("me@example.com") {
+//! email.deliver_to_maildir("/my/maildir/path")?;
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Searching with regular expressions
+//!
+//! The [EmailRegex](trait.EmailRegex.html) trait provides convenience methods
+//! for searching the header, the body or the whole email with regular
+//! expressions. The convenience functions use case-insensitive, multi-line
+//! search (`^` and `$` match beginning and end of lines). If the above don't
+//! match your needs, or you require additional functionality, you can perform
+//! manual regex search using the email data.
+//!
+//! ```no_run
+//! use mda::{Email, EmailRegex};
+//! let email = Email::from_stdin()?;
+//! if email.header().search(r"^To:.*me@example.com")? {
+//! email.deliver_to_maildir("/my/maildir/path")?;
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Processing and filtering the email with external programs
+//!
+//! Use the [Email::filter](struct.Email.html#method.filter) and
+//! [Email::from_stdin_filtered](struct.Email.html#method.from_stdin_filtered)
+//! methods to filter the email, in both cases creating a new email.
+//!
+//! ```no_run
+//! use mda::Email;
+//! // Filtering directly from stdin is more efficient.
+//! let email = Email::from_stdin_filtered(&["bogofilter", "-ep"])?;
+//! let bogosity = email.header_field("X-Bogosity").unwrap_or("");
+//! if bogosity.contains("Spam, tests=bogofilter") {
+//! email.deliver_to_maildir("/my/spam/path")?;
+//! }
+//! // We can also filter at any other time.
+//! let email = email.filter(&["bogofilter", "-ep"])?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! To perform more general processing use the
+//! [Email::process](struct.Email.html#method.process)
+//! method:
+//!
+//! ```no_run
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! let output = email.process(&["bogofilter"])?;
+//! if let Some(0) = output.status.code() {
+//! email.deliver_to_maildir("/my/spam/path")?;
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Access to byte data
+//!
+//! Use the [Email::header](struct.Email.html#method.header),
+//! [Email::body](struct.Email.html#method.body),
+//! [Email::data](struct.Email.html#method.data) methods to access the
+//! normalized byte data of the header, body and whole email respectively.
+//!
+//! Normalization involves ensuring header fields are in single lines, decoding
+//! text parts of the message that use some kind of transfer encoding (e.g.,
+//! base64), and converting all text to UTF-8 character encoding.
+//!
+//! If for some reason you need access to non-normalized data use
+//! [Email::raw_data](struct.Email.html#method.raw_data).
+//!
+//! ```no_run
+//! use std::str;
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! let body_str = String::from_utf8_lossy(email.header());
+//!
+//! if body_str.contains("FREE BEER") {
+//! email.deliver_to_maildir("/my/spam/path")?;
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Decide delivery durability vs speed trade-off
+//!
+//! Use the [Email::set_delivery_durability](struct.Email.html#method.set_delivery_durability)
+//! to decide which [DeliveryDurability](enum.DeliveryDurability.html) method to use.
+//! By default the most durable (but also slower) method is used.
+//!
+//! ```no_run
+//! use mda::{Email, DeliveryDurability};
+//! let mut email = Email::from_stdin()?;
+//! email.set_delivery_durability(DeliveryDurability::FileSyncOnly);
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
+mod deliver;
+mod regex;
+mod processing;
+mod normalize;
+mod decode;
+
+use std::io;
+use std::io::prelude::*;
+use std::path::{PathBuf, Path};
+use std::sync:: {Arc, Mutex, RwLock};
+use std::collections::HashMap;
+
+use deliver::{Maildir, EmailFilenameGenerator};
+use normalize::normalize_email;
+
+pub use crate::regex::EmailRegex;
+
+pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
+
+fn find_empty_line(data: &[u8]) -> Option<usize> {
+ data.windows(2).position(|w| w[0]== b'\n' && (w[1] == b'\n' || w[1] == b'\r'))
+}
+
+/// The method to use to try to guarantee durable email delivery.
+#[derive(PartialEq, Copy, Clone)]
+pub enum DeliveryDurability {
+ /// Perform both file and directory syncing during delivery.
+ /// This is the default delivery durability method.
+ FileAndDirSync,
+ /// Perform only file sync during delivery. This method is
+ /// potentially much faster, and is used by many existing
+ /// MDAs, but, depending on the used filesystem, may not
+ /// provide the required delivery durability guarantees.
+ FileSyncOnly,
+}
+
+/// A representation of an email.
+pub struct Email {
+ data: Vec<u8>,
+ normalized_data: Vec<u8>,
+ body_index: usize,
+ deliver_path: RwLock<Option<PathBuf>>,
+ fields: HashMap<String, Vec<String>>,
+ email_filename_gen: Arc<Mutex<EmailFilenameGenerator>>,
+ delivery_durability: DeliveryDurability,
+}
+
+impl Email {
+ /// Creates an `Email` by reading data from stdin.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// # use mda::Email;
+ /// let email = Email::from_stdin()?;
+ /// # Ok::<(), Box<dyn std::error::Error>>(())
+ /// ```
+ pub fn from_stdin() -> Result<Self> {
+ let stdin = io::stdin();
+ let mut data = Vec::new();
+ stdin.lock().read_to_end(&mut data)?;
+ Email::from_vec(data)
+ }
+
+ /// Creates an `Email` by using data passed in a `Vec<u8>`.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// # use mda::Email;
+ /// let email = Email::from_vec(vec![1, 2, 3])?;
+ /// # Ok::<(), Box<dyn std::error::Error>>(())
+ /// ```
+ pub fn from_vec(data: Vec<u8>) -> Result<Self> {
+ let (normalized_data, fields) = normalize_email(&data);
+ let body_index = find_empty_line(&normalized_data).unwrap_or(normalized_data.len());
+ let email_filename_gen = Arc::new(Mutex::new(EmailFilenameGenerator::new()));
+
+ Ok(
+ Email{
+ data: data,
+ normalized_data: normalized_data,
+ body_index: body_index,
+ deliver_path: RwLock::new(None),
+ fields: fields,
+ email_filename_gen: email_filename_gen,
+ delivery_durability: DeliveryDurability::FileAndDirSync,
+ }
+ )
+ }
+
+ /// Sets the durability method for delivery of this email.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// # use mda::{DeliveryDurability, Email};
+ /// let mut email = Email::from_stdin()?;
+ /// email.set_delivery_durability(DeliveryDurability::FileSyncOnly);
+ /// # Ok::<(), Box<dyn std::error::Error>>(())
+ /// ```
+ pub fn set_delivery_durability(&mut self, delivery_durability: DeliveryDurability) {
+ self.delivery_durability = delivery_durability;
+ }
+
+ /// Returns the value of a header field, if present. If a field occurs
+ /// multiple times, the value of the first occurrence is returned.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// # use mda::Email;
+ /// let email = Email::from_stdin()?;
+ /// let to = email.header_field("To").unwrap_or("");
+ /// # Ok::<(), Box<dyn std::error::Error>>(())
+ /// ```
+ pub fn header_field(&self, name: &str) -> Option<&str> {
+ self.fields.get(&name.to_lowercase()).map(|v| v[0].as_str())
+ }
+
+ /// Returns the values from all occurrences of a header field, if present.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// # use mda::Email;
+ /// let email = Email::from_stdin()?;
+ /// if let Some(all_received) = email.header_field_all_occurrences("Received") {
+ /// // process all_received
+ /// }
+ /// # Ok::<(), Box<dyn std::error::Error>>(())
+ /// ```
+ pub fn header_field_all_occurrences(&self, name: &str) -> Option<&Vec<String>> {
+ self.fields.get(&name.to_lowercase()).map(|v| v)
+ }
+
+ /// Delivers the email to the specified maildir. If the maildir isn't
+ /// present it is created.
+ ///
+ /// The first delivery of an email involves writing the email data to
+ /// the target file, whereas subsequent deliveries try to use a hard link
+ /// to the first delivery, falling back to a normal write if needed.
+ ///
+ /// The email is delivered durably by syncing both the file and the
+ /// associated directories (`DeliveryDurability::FileAndDirSync`),
+ /// unless a different durability method is specified with
+ /// `set_delivery_durability`.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// # use mda::Email;
+ /// let email = Email::from_stdin()?;
+ /// email.deliver_to_maildir("/path/to/maildir/")?;
+ /// # Ok::<(), Box<dyn std::error::Error>>(())
+ /// ```
+ pub fn deliver_to_maildir(&self, path: impl AsRef<Path>) -> Result<PathBuf> {
+ self.deliver_to_maildir_path(path.as_ref())
+ }
+
+ fn deliver_to_maildir_path(&self, path: &Path) -> Result<PathBuf> {
+ let maildir = Maildir::open_or_create(&path, self.email_filename_gen.clone())?;
+
+ if let Some(deliver_path) = self.deliver_path.read().unwrap().as_ref() {
+ let email_path_result =
+ maildir.deliver_with_hard_link(
+ deliver_path,
+ self.delivery_durability);
+
+ if email_path_result.is_ok() {
+ return email_path_result;
+ }
+ }
+
+ let email_path = maildir.deliver(&self.data, self.delivery_durability)?;
+
+ *self.deliver_path.write().unwrap() = Some(email_path.clone());
+
+ Ok(email_path)
+ }
+
+ /// Returns whether the email has been delivered to at least one maildir.
+ ///
+ /// # Example
+ ///
+ /// ```no_run
+ /// # use mda::Email;
+ /// let email = Email::from_stdin()?;
+ /// if !email.has_been_delivered() {
+ /// email.deliver_to_maildir("/fallback/maildir/")?;
+ /// }
+ /// # Ok::<(), Box<dyn std::error::Error>>(())
+ /// ```
+ pub fn has_been_delivered(&self) -> bool {
+ self.deliver_path.read().unwrap().is_some()
+ }
+
+ /// Provides access to the normalized email byte data.
+ pub fn data(&self) -> &[u8] {
+ &self.normalized_data
+ }
+
+ /// Provides access to the normalized email header byte data.
+ pub fn header(&self) -> &[u8] {
+ &self.normalized_data[..self.body_index]
+ }
+
+ /// Provides access to the normalized email body byte data.
+ pub fn body(&self) -> &[u8] {
+ &self.normalized_data[self.body_index..]
+ }
+
+ /// Provides access to the raw (non-normalized) email byte data.
+ pub fn raw_data(&self) -> &[u8] {
+ &self.data
+ }
+}
diff --git a/src/normalize.rs b/src/normalize.rs
new file mode 100644
index 0000000..7c8487d
--- /dev/null
+++ b/src/normalize.rs
@@ -0,0 +1,477 @@
+// Copyright 2019 Alexandros Frantzis
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+//
+// SPDX-License-Identifier: MPL-2.0
+
+//! Normalization of email data for easier processing.
+//!
+//! Normalization includes:
+//!
+//! * Placing multi-line header fields on a single line
+//! * Decoding base64 or quoted-printable encoded text data, including
+//! MIME encoded-words in the header.
+//! * Converting all text data to UTF-8.
+
+use ::regex::bytes::{RegexBuilder, Regex, Captures};
+use std::collections::HashMap;
+use std::iter::Peekable;
+use memchr::{memchr, memchr_iter};
+use charset::Charset;
+use std::borrow::Cow;
+use lazy_static::lazy_static;
+
+use crate::decode::{base64_decode_into_buf, qp_decode_into_buf};
+
+/// An element recognized by the [EmailParser](struct.EmailParser.html).
+enum Element {
+ HeaderField{data: Vec<u8>},
+ Body{
+ data: Vec<u8>,
+ encoding: Option<String>,
+ content_type: Option<String>,
+ charset: Option<String>
+ },
+ Verbatim{data: Vec<u8>},
+}
+
+/// Information about a part in a multi-part email message.
+/// The top-level is also considered a part.
+struct Part {
+ encoding: Option<String>,
+ content_type: Option<String>,
+ charset: Option<String>,
+ subpart_boundary: Option<Vec<u8>>,
+}
+
+impl Part {
+ fn new() -> Self {
+ Part{
+ encoding: None,
+ content_type: None,
+ charset: None,
+ subpart_boundary: None,
+ }
+ }
+}
+
+/// Iterator for the lines contained in a slice of [u8].
+pub struct SliceLines<'a> {
+ buf: &'a [u8],
+ last: usize,
+}
+
+impl<'a> Iterator for SliceLines<'a> {
+ type Item = &'a [u8];
+
+ fn next(&mut self) -> Option<&'a [u8]> {
+ match memchr(b'\n', &self.buf[self.last..]) {
+ Some(m) => {
+ let line = &self.buf[self.last..=(self.last + m)];
+ self.last = self.last + m + 1;
+ Some(line)
+ },
+ None => {
+ let line = &self.buf[self.last..];
+ if line.is_empty() {
+ None
+ } else {
+ self.last = self.buf.len();
+ Some(line)
+ }
+ }
+ }
+ }
+}
+
+/// A parser for the elements contained in an email.
+///
+/// The parsed elements are accessible by iterating over the parser.
+///
+/// Every line in the email is contained in a MIME part (which itself may be
+/// nested in another part). The top level of the email is also considered
+/// to be a part for convenience of processing.
+struct EmailParser<'a> {
+ lines: Peekable<SliceLines<'a>>,
+ // The stack of nested parts the line we are processing is contained in.
+ part_stack: Vec<Part>,
+ // Whether we currently parsing header lines.
+ in_header: bool,
+ // The active multi-part boundary.
+ active_boundary: Vec<u8>,
+ content_encoding_regex: Regex,
+ content_type_regex: Regex,
+ boundary_regex: Regex,
+}
+
+impl<'a> EmailParser<'a> {
+ fn new(buf: &'a [u8]) -> Self {
+ let content_encoding_regex =
+ RegexBuilder::new(r"Content-Transfer-Encoding:\s*([[:alnum:]-]+)")
+ .case_insensitive(true)
+ .build().unwrap();
+ let content_type_regex =
+ RegexBuilder::new(r#"^Content-Type:\s*([^;]+)\s*(?:;\s*charset\s*=\s*"?([[:alnum:]_:\-\.]+))?"?"#)
+ .case_insensitive(true)
+ .build().unwrap();
+
+ let boundary_regex =
+ RegexBuilder::new(r#"^Content-Type:\s*multipart/.*boundary\s*=\s*"?([[:alnum:]'_,/:=\(\)\+\-\.\?]+)"?"#)
+ .case_insensitive(true)
+ .build().unwrap();
+
+ EmailParser{
+ lines: SliceLines{buf, last: 0}.peekable(),
+ // All emails have the top-level part.
+ part_stack: vec![Part::new()],
+ in_header: true,
+ active_boundary: Vec::new(),
+ content_encoding_regex: content_encoding_regex,
+ content_type_regex: content_type_regex,
+ boundary_regex: boundary_regex,
+ }
+ }
+
+ // Returns the content type of the active part.
+ fn active_content_type(&self) -> Option<String> {
+ self.part_stack.last()?.content_type.clone()
+ }
+
+ // Returns the encoding of the active part.
+ fn active_encoding(&self) -> Option<String> {
+ self.part_stack.last()?.encoding.clone()
+ }
+
+ // Returns the charset of the active part.
+ fn active_charset(&self) -> Option<String> {
+ self.part_stack.last()?.charset.clone()
+ }
+
+ fn begin_part(&mut self) {
+ let part = self.part_stack.last().unwrap();
+
+ // We need to differentiate between the first and subsequent parts in a
+ // multipart message. The first part creates a new subpart in the
+ // part_stack...
+ if part.subpart_boundary.as_ref().is_some() &&
+ part.subpart_boundary.as_ref().unwrap() == &self.active_boundary {
+ self.part_stack.push(Part::new())
+ } else {
+ // ...whereas subsequent sibling parts just replace the existing
+ // part in the stack.
+ let part = self.part_stack.last_mut().unwrap();
+ *part = Part::new();
+ }
+ }
+
+ fn end_part(&mut self) {
+ self.part_stack.pop();
+ if let Some(part) = self.part_stack.last_mut() {
+ part.subpart_boundary = None;
+ }
+ for p in self.part_stack.iter().rev() {
+ if let Some(b) = &p.subpart_boundary {
+ self.active_boundary = b.clone();
+ }
+ }
+ }
+
+ fn update_active_part_from_header_field(&mut self, field: &[u8]) {
+ let mut part = self.part_stack.last_mut().unwrap();
+
+ if let Some(captures) = self.content_encoding_regex.captures(&field) {
+ let enc_bytes = captures.get(1).unwrap().as_bytes();
+ part.encoding = Some(std::str::from_utf8(&enc_bytes).unwrap().to_lowercase());
+ } else if let Some(captures) = self.boundary_regex.captures(&field) {
+ part.subpart_boundary = Some(captures.get(1).unwrap().as_bytes().to_vec());
+ self.active_boundary = part.subpart_boundary.as_ref().unwrap().clone();
+ }
+ else if let Some(captures) = self.content_type_regex.captures(&field) {
+ let type_bytes = captures.get(1).unwrap().as_bytes();