Initial public release

author: Alexandros Frantzis <alf82@freemail.gr> 2019-10-01 23:06:38 +0300
committer: Alexandros Frantzis <alf82@freemail.gr> 2019-10-01 23:36:26 +0300
commit: a054789ddb60ed1fab26e6d4e6bd36ed926273f1 (patch)
tree: bd3caad78a0e377816b78276889301d9276344b9 /src
6 files changed, 1459 insertions, 0 deletions
diff --git a/src/decode.rs b/src/decode.rs
new file mode 100644
index 0000000..8004f18
--- /dev/null
+++ b/src/decode.rs
@@ -0,0 +1,224 @@
+// Copyright 2019 Alexandros Frantzis
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+//
+// SPDX-License-Identifier: MPL-2.0
+
+//! Base64 and quoted-printable decoding.
+
+use crate::Result;
+
+const PAD: u8 = 64; // The pseudo-index of the PAD character.
+const INV: u8 = 99; // An invalid index.
+
+static BASE64_INDICES: &'static [u8] = &[
+     //   0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
+/* 0 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* 1 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* 2 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,  62, INV, INV, INV,  63,
+/* 3 */  52,  53,  54,  55,  56,  57,  58,  59,  60,  61, INV, INV, INV, PAD, INV, INV,
+/* 4 */ INV,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
+/* 5 */  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, INV, INV, INV, INV, INV,
+/* 6 */ INV,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,
+/* 7 */  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, INV, INV, INV, INV, INV,
+/* 8 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* 9 */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* A */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* B */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* C */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* D */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* E */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+/* F */ INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV, INV,
+];
+
+/// Decodes base64 encoded data, appending the decoded data to a Vec<u8>.
+///
+/// During decoding all line breaks and invalid characters are ignored.
+/// If an error is encountered during decoding, the already decoded data in the
+/// output buffer is left intact. It's up to the caller to deal with the partial
+/// decoded data in case of failure.
+pub fn base64_decode_into_buf(input: &[u8], output: &mut Vec<u8>) -> Result<()> {
+    let mut num_chars = 0;
+    let mut cur_triplet = 0;
+    let mut valid_chars = 0;
+
+    for c in input {
+        let ci = BASE64_INDICES[*c as usize];
+        match ci {
+            // rfc2045: All line breaks or other characters not
+            // found in Table 1 must be ignored by decoding software.
+            INV => continue,
+            _ if ci < PAD => valid_chars += 1,
+            _ => {}
+        }
+
+        cur_triplet = cur_triplet << 6 | ((ci & 0x3f) as u32);
+        num_chars += 1;
+
+        if num_chars == 4 {
+            match valid_chars {
+                2 => output.push((cur_triplet >> 16) as u8),
+                3 => output.extend(
+                    &[(cur_triplet >> 16) as u8, (cur_triplet >> 8) as u8]
+                ),
+                4 => output.extend(
+                    &[(cur_triplet >> 16) as u8,
+                      (cur_triplet >> 8) as u8,
+                      cur_triplet as u8
+                    ]
+                ),
+                _ => return Err("Invalid base64 encoding".into()),
+            }
+
+            cur_triplet = 0;
+            num_chars = 0;
+            valid_chars = 0;
+        }
+    }
+
+    // rfc2045: A full encoding quantum is always completed at the end of a body.
+    if num_chars != 0 {
+        return Err("Unpadded input".into());
+    }
+
+    Ok(())
+}
+
+/// Converts an ascii byte representing a hex digit to it's numerical value.
+fn hexdigit_to_num(mut a: u8) -> Option<u8> {
+    if a.is_ascii_digit() {
+        return Some(a - b'0');
+    }
+
+    a.make_ascii_lowercase();
+
+    if a >= b'a' && a <= b'f' {
+        return Some(a - b'a' + 10);
+    }
+
+    None
+}
+
+/// Decodes quoted-printable encoded data, appending the decoding data to a
+/// Vec<u8>.
+///
+/// During decoding all line breaks and invalid characters are ignored.
+/// If an error is encountered during decoding, the already decoded data in the
+/// output buffer is left intact. It's up to the caller to deal with the partial
+/// decoded data in case of failure.
+pub fn qp_decode_into_buf(input: &[u8], output: &mut Vec<u8>) -> Result<()> {
+    let mut iter = input.iter().peekable();
+
+    'outer: loop {
+        loop {
+            match iter.next() {
+                Some(b'=') => break,
+                Some(c) => output.push(*c),
+                None => break 'outer,
+            }
+        }
+
+        // At this point we have encountered a '=', so check
+        // to see what follows.
+        if let Some(&first) = iter.next() {
+            // A CRLF/LF after '=' marks a line continuation, and
+            // is effectively dropped.
+            if first == b'\r' {
+                if iter.peek() == Some(&&b'\n') {
+                    iter.next();
+                    continue;
+                }
+            } else if first == b'\n' {
+                continue;
+            } else if let Some(first_num) = hexdigit_to_num(first) {
+                // A valid pair of hexdigits represent the raw byte value.
+                if let Some(&&second) = iter.peek() {
+                    if let Some(second_num) = hexdigit_to_num(second) {
+                        output.push(first_num * 16 + second_num);
+                        iter.next();
+                        continue;
+                    }
+                }
+            }
+
+            // Emit the raw sequence if it's not one of the special
+            // special cases checked above.
+            output.extend(&[b'=', first]);
+        } else {
+            // Last character in the input was an '=', just emit it.
+            output.push(b'=');
+        }
+    }
+
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod test_base64 {
+    use crate::decode::base64_decode_into_buf;
+
+    #[test]
+    fn decodes_full_length() {
+        let mut decoded = Vec::new();
+        assert!(base64_decode_into_buf("YWJj".as_bytes(), &mut decoded).is_ok());
+        assert_eq!(decoded, &[b'a', b'b', b'c']);
+    }
+
+    #[test]
+    fn decodes_with_two_padding() {
+        let mut decoded = Vec::new();
+        assert!(base64_decode_into_buf("YWJjZA==".as_bytes(), &mut decoded).is_ok());
+        assert_eq!(decoded, &[b'a', b'b', b'c', b'd']);
+    }
+
+    #[test]
+    fn decodes_with_one_padding() {
+        let mut decoded = Vec::new();
+        assert!(base64_decode_into_buf("YWJjZGU=".as_bytes(), &mut decoded).is_ok());
+        assert_eq!(decoded, &[b'a', b'b', b'c', b'd', b'e']);
+    }
+
+    #[test]
+    fn error_with_invalid_paddings() {
+        let mut decoded = Vec::new();
+        assert!(base64_decode_into_buf("YWJj====".as_bytes(), &mut decoded).is_err());
+        assert!(base64_decode_into_buf("YWJjZ===".as_bytes(), &mut decoded).is_err());
+        assert!(base64_decode_into_buf("====".as_bytes(), &mut decoded).is_err());
+    }
+
+    #[test]
+    fn error_with_unpadded_input() {
+        let mut decoded = Vec::new();
+        assert!(base64_decode_into_buf("YWJjZA=".as_bytes(), &mut decoded).is_err());
+    }
+}
+
+#[cfg(test)]
+mod test_qp {
+    use crate::decode::qp_decode_into_buf;
+
+    #[test]
+    fn decodes_byte() {
+        let mut decoded = Vec::new();
+        assert!(qp_decode_into_buf("a=62c=64".as_bytes(), &mut decoded).is_ok());
+        assert_eq!(decoded, &[b'a', b'b', b'c', b'd']);
+    }
+
+    #[test]
+    fn decodes_soft_break() {
+        let mut decoded = Vec::new();
+        assert!(qp_decode_into_buf("a=\r\nb=\nc".as_bytes(), &mut decoded).is_ok());
+        assert_eq!(decoded, &[b'a', b'b', b'c']);
+    }
+
+    #[test]
+    fn invalid_sequences_are_untouched() {
+        let mut decoded = Vec::new();
+        let invalid_sequence = "a=6t= c=".as_bytes();
+        assert!(qp_decode_into_buf(invalid_sequence, &mut decoded).is_ok());
+        assert_eq!(decoded, invalid_sequence);
+    }
+}
diff --git a/src/deliver.rs b/src/deliver.rs
new file mode 100644
index 0000000..8ade10f
--- /dev/null
+++ b/src/deliver.rs
@@ -0,0 +1,176 @@
+// Copyright 2019 Alexandros Frantzis
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+//
+// SPDX-License-Identifier: MPL-2.0
+
+//! Email delivery functionality.
+
+use std::fs::{self, File};
+use std::io::ErrorKind;
+use std::io::prelude::*;
+use std::os::unix::prelude::*;
+use std::path::{PathBuf, Path};
+use std::process;
+use std::sync::{Arc, Mutex};
+use std::time::{SystemTime, UNIX_EPOCH};
+
+use crate::{DeliveryDurability, Result};
+
+use gethostname::gethostname;
+use libc;
+
+/// A generator for likely unique maildir email filenames.
+///
+/// Using it as an iterator gets a filename that can be used in a maildir
+/// and is likely to be unique.
+pub struct EmailFilenameGenerator {
+    count: usize,
+    max_seen_unix_time: u64,
+    hostname: String,
+}
+
+impl EmailFilenameGenerator {
+    pub fn new() -> Self {
+        // From https://cr.yp.to/proto/maildir.html:
+        // "To deal with invalid host names, replace / with \057 and : with \072"
+        let hostname =
+            gethostname()
+                .to_string_lossy()
+                .into_owned()
+                .replace("/", r"\057")
+                .replace(":", r"\072");
+
+        EmailFilenameGenerator{
+            count: 0,
+            max_seen_unix_time: 0,
+            hostname: hostname,
+        }
+    }
+}
+
+impl Iterator for EmailFilenameGenerator {
+    type Item = String;
+
+    fn next(&mut self) -> Option<String> {
+        let unix_time = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_secs();
+        let pid = process::id();
+
+        if self.max_seen_unix_time < unix_time {
+            self.max_seen_unix_time = unix_time;
+            self.count = 0;
+        } else {
+            self.count += 1;
+        }
+
+        Some(format!("{}.{}_{}.{}", unix_time, pid, self.count, self.hostname))
+    }
+}
+
+/// A representation of a maildir.
+pub struct Maildir {
+    root: PathBuf,
+    email_filename_gen: Arc<Mutex<EmailFilenameGenerator>>,
+}
+
+impl Maildir {
+    /// Opens, or creates if it doesn't a exist, a maildir directory structure
+    /// at the specified path.
+    pub fn open_or_create(
+        mailbox: &Path,
+        email_filename_gen: Arc<Mutex<EmailFilenameGenerator>>
+    ) -> Result<Self> {
+        let root = PathBuf::from(mailbox);
+        for s in &["tmp", "new", "cur"] {
+            let path = root.join(&s);
+            fs::create_dir_all(&path)?;
+        }
+
+        Ok(Maildir{root, email_filename_gen})
+    }
+
+    /// Delivers an email to the maildir by creating a new file with the email data,
+    /// and using the specified DeliveryDurability method.
+    pub fn deliver(
+        &self,
+        data: &[u8],
+        delivery_durability: DeliveryDurability
+    ) -> Result<PathBuf> {
+        loop {
+            let tmp_dir = self.root.join("tmp");
+            let new_dir = self.root.join("new");
+
+            let tmp_email = self.write_email_to_dir(data, &tmp_dir)?;
+            let new_email = new_dir.join(
+                tmp_email.file_name().ok_or("")?.to_str().ok_or("")?);
+
+            let result = fs::hard_link(&tmp_email, &new_email);
+            fs::remove_file(&tmp_email)?;
+
+            match result {
+                Ok(_) => {
+                    if delivery_durability == DeliveryDurability::FileAndDirSync {
+                        File::open(&new_dir)?.sync_all()?;
+                        File::open(&tmp_dir)?.sync_all()?;
+                    }
+                    return Ok(new_email);
+                },
+                Err(ref err) if err.kind() == ErrorKind::AlreadyExists => {},
+                Err(err)  => return Err(err.into()),
+            }
+        }
+    }
+
+    /// Delivers an email to the maildir by hard-linking with an existing file,
+    /// and using the specified DeliveryDurability method.
+    pub fn deliver_with_hard_link(
+        &self,
+        src: &Path,
+        delivery_durability: DeliveryDurability
+    ) -> Result<PathBuf> {
+        loop {
+            let new_dir = self.root.join("new");
+            let new_email = new_dir.join(self.next_email_filename_candidate()?);
+
+            match fs::hard_link(&src, &new_email) {
+                Ok(_) => {
+                    if delivery_durability == DeliveryDurability::FileAndDirSync {
+                        File::open(&new_dir)?.sync_all()?;
+                    }
+                    return Ok(new_email);
+                },
+                Err(ref err) if err.kind() == ErrorKind::AlreadyExists => {},
+                Err(err)  => return Err(err.into()),
+            }
+        }
+    }
+
+    /// Writes email data to a new file in the specified directory.
+    fn write_email_to_dir(&self, data: &[u8], dir: &Path) -> Result<PathBuf> {
+        loop {
+            let email = dir.join(self.next_email_filename_candidate()?);
+            let result = fs::OpenOptions::new()
+                        .create_new(true)
+                        .write(true)
+                        .custom_flags(libc::O_SYNC)
+                        .open(&email);
+
+            match result {
+                Ok(mut f) => {
+                    f.write_all(&data)?;
+                    return Ok(email);
+                },
+                Err(ref err) if err.kind() == ErrorKind::AlreadyExists => {},
+                Err(err)  => return Err(err.into()),
+            }
+        }
+    }
+
+    /// Gets the next email filename candidate from the EmailFilenameGenerator.
+    fn next_email_filename_candidate(&self) -> Result<String> {
+        let mut gen = self.email_filename_gen.lock().map_err(|_| "")?;
+        gen.next().ok_or("".into())
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..bdb8c85
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,373 @@
+// Copyright 2019 Alexandros Frantzis
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+//
+// SPDX-License-Identifier: MPL-2.0
+
+//! The mda crate provides a library for writing custom Mail Deliver Agents. It
+//! supports local delivery to maildirs, access to normalized email byte
+//! data for easier processing, and access to individual header fields.
+//!
+//! Email data normalization involves ensuring header fields are in single
+//! lines, decoding text parts of the message that use some kind of transfer
+//! encoding (e.g., base64), and converting all text to UTF-8.  The original
+//! (non-normalized) email data is used during delivery.
+//!
+//! This crate also exposes convenience methods for regular expression searching
+//! and processing/filtering of emails.
+//!
+//! # Email construction
+//!
+//! The [Email struct](struct.Email.html) is the basic abstraction of the `mda`
+//! crate. To construct an Email use the
+//! [Email::from_stdin](struct.Email.html#method.from_stdin) or
+//! [Email::from_vec](struct.Email.html#method.from_vec) method.
+//!
+//! ```no_run
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! let email = Email::from_vec(vec![97, 98, 99])?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Email delivery
+//!
+//! Use the
+//! [Email::deliver_to_maildir](struct.Email.html#method.deliver_to_maildir)
+//! method to deliver the email to local maildir directories. Note that
+//! the original (non-normalized) email data is used during delivery.
+//!
+//! ```no_run
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! email.deliver_to_maildir("/my/maildir/path")?;
+//! email.deliver_to_maildir("/my/other/maildir/path")?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Accessing email header fields
+//!
+//! Use the [Email::header_field](struct.Email.html#method.header_field) and
+//! [Email::header_field_all_occurrences](struct.Email.html#method.header_field_all_occurrences)
+//! methods to access the email header fields. Any MIME encoded words in the
+//! header field values are decoded and the field value is converted to UTF-8.
+//!
+//! ```no_run
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! let to = email.header_field("To").unwrap_or("");
+//! if to.contains("me@example.com") {
+//!     email.deliver_to_maildir("/my/maildir/path")?;
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Searching with regular expressions
+//!
+//! The [EmailRegex](trait.EmailRegex.html) trait provides convenience methods
+//! for searching the header, the body or the whole email with regular
+//! expressions. The convenience functions use case-insensitive, multi-line
+//! search (`^` and `$` match beginning and end of lines).  If the above don't
+//! match your needs, or you require additional functionality, you can perform
+//! manual regex search using the email data.
+//!
+//! ```no_run
+//! use mda::{Email, EmailRegex};
+//! let email = Email::from_stdin()?;
+//! if email.header().search(r"^To:.*me@example.com")? {
+//!     email.deliver_to_maildir("/my/maildir/path")?;
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Processing and filtering the email with external programs
+//!
+//! Use the [Email::filter](struct.Email.html#method.filter) and
+//! [Email::from_stdin_filtered](struct.Email.html#method.from_stdin_filtered)
+//! methods to filter the email, in both cases creating a new email.
+//!
+//! ```no_run
+//! use mda::Email;
+//! // Filtering directly from stdin is more efficient.
+//! let email = Email::from_stdin_filtered(&["bogofilter", "-ep"])?;
+//! let bogosity = email.header_field("X-Bogosity").unwrap_or("");
+//! if bogosity.contains("Spam, tests=bogofilter") {
+//!     email.deliver_to_maildir("/my/spam/path")?;
+//! }
+//! // We can also filter at any other time.
+//! let email = email.filter(&["bogofilter", "-ep"])?;
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! To perform more general processing use the
+//! [Email::process](struct.Email.html#method.process)
+//! method:
+//!
+//! ```no_run
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! let output = email.process(&["bogofilter"])?;
+//! if let Some(0) = output.status.code() {
+//!     email.deliver_to_maildir("/my/spam/path")?;
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Access to byte data
+//!
+//! Use the [Email::header](struct.Email.html#method.header),
+//! [Email::body](struct.Email.html#method.body),
+//! [Email::data](struct.Email.html#method.data) methods to access the
+//! normalized byte data of the header, body and whole email respectively.
+//!
+//! Normalization involves ensuring header fields are in single lines, decoding
+//! text parts of the message that use some kind of transfer encoding (e.g.,
+//! base64), and converting all text to UTF-8 character encoding.
+//!
+//! If for some reason you need access to non-normalized data use
+//! [Email::raw_data](struct.Email.html#method.raw_data).
+//!
+//! ```no_run
+//! use std::str;
+//! use mda::Email;
+//! let email = Email::from_stdin()?;
+//! let body_str = String::from_utf8_lossy(email.header());
+//!
+//! if body_str.contains("FREE BEER") {
+//!     email.deliver_to_maildir("/my/spam/path")?;
+//! }
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+//!
+//! # Decide delivery durability vs speed trade-off
+//!
+//! Use the [Email::set_delivery_durability](struct.Email.html#method.set_delivery_durability)
+//! to decide which [DeliveryDurability](enum.DeliveryDurability.html) method to use.
+//! By default the most durable (but also slower) method is used.
+//!
+//! ```no_run
+//! use mda::{Email, DeliveryDurability};
+//! let mut email = Email::from_stdin()?;
+//! email.set_delivery_durability(DeliveryDurability::FileSyncOnly);
+//! # Ok::<(), Box<dyn std::error::Error>>(())
+//! ```
+
+mod deliver;
+mod regex;
+mod processing;
+mod normalize;
+mod decode;
+
+use std::io;
+use std::io::prelude::*;
+use std::path::{PathBuf, Path};
+use std::sync:: {Arc, Mutex, RwLock};
+use std::collections::HashMap;
+
+use deliver::{Maildir, EmailFilenameGenerator};
+use normalize::normalize_email;
+
+pub use crate::regex::EmailRegex;
+
+pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>;
+
+fn find_empty_line(data: &[u8]) -> Option<usize> {
+    data.windows(2).position(|w| w[0]== b'\n' && (w[1] == b'\n' || w[1] == b'\r'))
+}
+
+/// The method to use to try to guarantee durable email delivery.
+#[derive(PartialEq, Copy, Clone)]
+pub enum DeliveryDurability {
+    /// Perform both file and directory syncing during delivery.
+    /// This is the default delivery durability method.
+    FileAndDirSync,
+    /// Perform only file sync during delivery. This method is
+    /// potentially much faster, and is used by many existing
+    /// MDAs, but, depending on the used filesystem, may not
+    /// provide the required delivery durability guarantees.
+    FileSyncOnly,
+}
+
+/// A representation of an email.
+pub struct Email {
+    data: Vec<u8>,
+    normalized_data: Vec<u8>,
+    body_index: usize,
+    deliver_path: RwLock<Option<PathBuf>>,
+    fields: HashMap<String, Vec<String>>,
+    email_filename_gen: Arc<Mutex<EmailFilenameGenerator>>,
+    delivery_durability: DeliveryDurability,
+}
+
+impl Email {
+    /// Creates an `Email` by reading data from stdin.
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// # use mda::Email;
+    /// let email = Email::from_stdin()?;
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn from_stdin() -> Result<Self> {
+        let stdin = io::stdin();
+        let mut data = Vec::new();
+        stdin.lock().read_to_end(&mut data)?;
+        Email::from_vec(data)
+    }
+
+    /// Creates an `Email` by using data passed in a `Vec<u8>`.
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// # use mda::Email;
+    /// let email = Email::from_vec(vec![1, 2, 3])?;
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn from_vec(data: Vec<u8>) -> Result<Self> {
+        let (normalized_data, fields) = normalize_email(&data);
+        let body_index = find_empty_line(&normalized_data).unwrap_or(normalized_data.len());
+        let email_filename_gen = Arc::new(Mutex::new(EmailFilenameGenerator::new()));
+
+        Ok(
+            Email{
+                data: data,
+                normalized_data: normalized_data,
+                body_index: body_index,
+                deliver_path: RwLock::new(None),
+                fields: fields,
+                email_filename_gen: email_filename_gen,
+                delivery_durability: DeliveryDurability::FileAndDirSync,
+            }
+        )
+    }
+
+    /// Sets the durability method for delivery of this email.
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// # use mda::{DeliveryDurability, Email};
+    /// let mut email = Email::from_stdin()?;
+    /// email.set_delivery_durability(DeliveryDurability::FileSyncOnly);
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn set_delivery_durability(&mut self, delivery_durability: DeliveryDurability) {
+        self.delivery_durability = delivery_durability;
+    }
+
+    /// Returns the value of a header field, if present. If a field occurs
+    /// multiple times, the value of the first occurrence is returned.
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// # use mda::Email;
+    /// let email = Email::from_stdin()?;
+    /// let to = email.header_field("To").unwrap_or("");
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn header_field(&self, name: &str) -> Option<&str> {
+        self.fields.get(&name.to_lowercase()).map(|v| v[0].as_str())
+    }
+
+    /// Returns the values from all occurrences of a header field, if present.
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// # use mda::Email;
+    /// let email = Email::from_stdin()?;
+    /// if let Some(all_received) = email.header_field_all_occurrences("Received") {
+    ///     // process all_received
+    /// }
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn header_field_all_occurrences(&self, name: &str) -> Option<&Vec<String>> {
+        self.fields.get(&name.to_lowercase()).map(|v| v)
+    }
+
+    /// Delivers the email to the specified maildir. If the maildir isn't
+    /// present it is created.
+    ///
+    /// The first delivery of an email involves writing the email data to
+    /// the target file, whereas subsequent deliveries try to use a hard link
+    /// to the first delivery, falling back to a normal write if needed.
+    ///
+    /// The email is delivered durably by syncing both the file and the
+    /// associated directories (`DeliveryDurability::FileAndDirSync`),
+    /// unless a different durability method is specified with
+    /// `set_delivery_durability`.
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// # use mda::Email;
+    /// let email = Email::from_stdin()?;
+    /// email.deliver_to_maildir("/path/to/maildir/")?;
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn deliver_to_maildir(&self, path: impl AsRef<Path>) -> Result<PathBuf> {
+        self.deliver_to_maildir_path(path.as_ref())
+    }
+
+    fn deliver_to_maildir_path(&self, path: &Path) -> Result<PathBuf> {
+        let maildir = Maildir::open_or_create(&path, self.email_filename_gen.clone())?;
+
+        if let Some(deliver_path) = self.deliver_path.read().unwrap().as_ref() {
+            let email_path_result =
+                maildir.deliver_with_hard_link(
+                    deliver_path,
+                    self.delivery_durability);
+
+            if email_path_result.is_ok() {
+                return email_path_result;
+            }
+        }
+
+        let email_path = maildir.deliver(&self.data, self.delivery_durability)?;
+
+        *self.deliver_path.write().unwrap() = Some(email_path.clone());
+
+        Ok(email_path)
+    }
+
+    /// Returns whether the email has been delivered to at least one maildir.
+    ///
+    /// # Example
+    ///
+    /// ```no_run
+    /// # use mda::Email;
+    /// let email = Email::from_stdin()?;
+    /// if !email.has_been_delivered() {
+    ///     email.deliver_to_maildir("/fallback/maildir/")?;
+    /// }
+    /// # Ok::<(), Box<dyn std::error::Error>>(())
+    /// ```
+    pub fn has_been_delivered(&self) -> bool {
+        self.deliver_path.read().unwrap().is_some()
+    }
+
+    /// Provides access to the normalized email byte data.
+    pub fn data(&self) -> &[u8] {
+        &self.normalized_data
+    }
+
+    /// Provides access to the normalized email header byte data.
+    pub fn header(&self) -> &[u8] {
+        &self.normalized_data[..self.body_index]
+    }
+
+    /// Provides access to the normalized email body byte data.
+    pub fn body(&self) -> &[u8] {
+        &self.normalized_data[self.body_index..]
+    }
+
+    /// Provides access to the raw (non-normalized) email byte data.
+    pub fn raw_data(&self) -> &[u8] {
+        &self.data
+    }
+}
diff --git a/src/normalize.rs b/src/normalize.rs
new file mode 100644
index 0000000..7c8487d
--- /dev/null
+++ b/src/normalize.rs
@@ -0,0 +1,477 @@
+// Copyright 2019 Alexandros Frantzis
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+//
+// SPDX-License-Identifier: MPL-2.0
+
+//! Normalization of email data for easier processing.
+//!
+//! Normalization includes:
+//!
+//! * Placing multi-line header fields on a single line
+//! * Decoding base64 or quoted-printable encoded text data, including
+//!   MIME encoded-words in the header.
+//! * Converting all text data to UTF-8.
+
+use ::regex::bytes::{RegexBuilder, Regex, Captures};
+use std::collections::HashMap;
+use std::iter::Peekable;
+use memchr::{memchr, memchr_iter};
+use charset::Charset;
+use std::borrow::Cow;
+use lazy_static::lazy_static;
+
+use crate::decode::{base64_decode_into_buf, qp_decode_into_buf};
+
+/// An element recognized by the [EmailParser](struct.EmailParser.html).
+enum Element {
+    HeaderField{data: Vec<u8>},
+    Body{
+        data: Vec<u8>,
+        encoding: Option<String>,
+        content_type: Option<String>,
+        charset: Option<String>
+    },
+    Verbatim{data: Vec<u8>},
+}
+
+/// Information about a part in a multi-part email message.
+/// The top-level is also considered a part.
+struct Part {
+    encoding: Option<String>,
+    content_type: Option<String>,
+    charset: Option<String>,
+    subpart_boundary: Option<Vec<u8>>,
+}
+
+impl Part {
+    fn new() -> Self {
+        Part{
+            encoding: None,
+            content_type: None,
+            charset: None,
+            subpart_boundary: None,
+        }
+    }
+}
+
+/// Iterator for the lines contained in a slice of [u8].
+pub struct SliceLines<'a> {
+    buf: &'a [u8],
+    last: usize,
+}
+
+impl<'a> Iterator for SliceLines<'a> {
+    type Item = &'a [u8];
+
+    fn next(&mut self) -> Option<&'a [u8]> {
+        match memchr(b'\n', &self.buf[self.last..]) {
+            Some(m) => {
+                let line = &self.buf[self.last..=(self.last + m)];
+                self.last = self.last + m + 1;
+                Some(line)
+            },
+            None => {
+                let line = &self.buf[self.last..];
+                if line.is_empty() {
+                    None
+                } else {
+                    self.last = self.buf.len();
+                    Some(line)
+                }
+            }
+        }
+    }
+}
+
+/// A parser for the elements contained in an email.
+///
+/// The parsed elements are accessible by iterating over the parser.
+///
+/// Every line in the email is contained in a MIME part (which itself may be
+/// nested in another part). The top level of the email is also considered
+/// to be a part for convenience of processing.
+struct EmailParser<'a> {
+    lines: Peekable<SliceLines<'a>>,
+    // The stack of nested parts the line we are processing is contained in.
+    part_stack: Vec<Part>,
+    // Whether we currently parsing header lines.
+    in_header: bool,
+    // The active multi-part boundary.
+    active_boundary: Vec<u8>,
+    content_encoding_regex: Regex,
+    content_type_regex: Regex,
+    boundary_regex: Regex,
+}
+
+impl<'a> EmailParser<'a> {
+    fn new(buf: &'a [u8]) -> Self {
+        let content_encoding_regex =
+            RegexBuilder::new(r"Content-Transfer-Encoding:\s*([[:alnum:]-]+)")
+                .case_insensitive(true)
+                .build().unwrap();
+        let content_type_regex =
+            RegexBuilder::new(r#"^Content-Type:\s*([^;]+)\s*(?:;\s*charset\s*=\s*"?([[:alnum:]_:\-\.]+))?"?"#)
+                .case_insensitive(true)
+                .build().unwrap();
+
+        let boundary_regex =
+            RegexBuilder::new(r#"^Content-Type:\s*multipart/.*boundary\s*=\s*"?([[:alnum:]'_,/:=\(\)\+\-\.\?]+)"?"#)
+                .case_insensitive(true)
+                .build().unwrap();
+
+        EmailParser{
+            lines: SliceLines{buf, last: 0}.peekable(),
+            // All emails have the top-level part.
+            part_stack: vec![Part::new()],
+            in_header: true,
+            active_boundary: Vec::new(),
+            content_encoding_regex: content_encoding_regex,
+            content_type_regex: content_type_regex,
+            boundary_regex: boundary_regex,
+        }
+    }
+
+    // Returns the content type of the active part.
+    fn active_content_type(&self) -> Option<String> {
+        self.part_stack.last()?.content_type.clone()
+    }
+
+    // Returns the encoding of the active part.
+    fn active_encoding(&self) -> Option<String> {
+        self.part_stack.last()?.encoding.clone()
+    }
+
+    // Returns the charset of the active part.
+    fn active_charset(&self) -> Option<String> {
+        self.part_stack.last()?.charset.clone()
+    }
+
+    fn begin_part(&mut self) {
+        let part = self.part_stack.last().unwrap();
+
+        // We need to differentiate between the first and subsequent parts in a
+        // multipart message. The first part creates a new subpart in the
+        // part_stack...
+        if part.subpart_boundary.as_ref().is_some() &&
+           part.subpart_boundary.as_ref().unwrap() == &self.active_boundary {
+            self.part_stack.push(Part::new())
+        } else {
+            // ...whereas subsequent sibling parts just replace the existing
+            // part in the stack.
+            let part = self.part_stack.last_mut().unwrap();
+            *part = Part::new();
+        }
+    }
+
+    fn end_part(&mut self) {
+        self.part_stack.pop();
+        if let Some(part) = self.part_stack.last_mut() {
+            part.subpart_boundary = None;
+        }
+        for p in self.part_stack.iter().rev() {
+            if let Some(b) = &p.subpart_boundary {
+                self.active_boundary = b.clone();
+            }
+        }
+    }
+
+    fn update_active_part_from_header_field(&mut self, field: &[u8]) {
+        let mut part = self.part_stack.last_mut().unwrap();
+
+        if let Some(captures) = self.content_encoding_regex.captures(&field) {
+            let enc_bytes = captures.get(1).unwrap().as_bytes();
+            part.encoding = Some(std::str::from_utf8(&enc_bytes).unwrap().to_lowercase());
+        } else if let Some(captures) = self.boundary_regex.captures(&field) {
+            part.subpart_boundary = Some(captures.get(1).unwrap().as_bytes().to_vec());
+            self.active_boundary = part.subpart_boundary.as_ref().unwrap().clone();
+        }
+        else if let Some(captures) = self.content_type_regex.captures(&field) {
+            let type_bytes = captures.get(1).unwrap().as_bytes();
author	Alexandros Frantzis <alf82@freemail.gr>	2019-10-01 23:06:38 +0300
committer	Alexandros Frantzis <alf82@freemail.gr>	2019-10-01 23:36:26 +0300
commit	a054789ddb60ed1fab26e6d4e6bd36ed926273f1 (patch)
tree	bd3caad78a0e377816b78276889301d9276344b9 /src