diff options
Diffstat (limited to 'src/lib.rs')
-rw-r--r-- | src/lib.rs | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..bdb8c85 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,373 @@ +// Copyright 2019 Alexandros Frantzis +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. +// +// SPDX-License-Identifier: MPL-2.0 + +//! The mda crate provides a library for writing custom Mail Deliver Agents. It +//! supports local delivery to maildirs, access to normalized email byte +//! data for easier processing, and access to individual header fields. +//! +//! Email data normalization involves ensuring header fields are in single +//! lines, decoding text parts of the message that use some kind of transfer +//! encoding (e.g., base64), and converting all text to UTF-8. The original +//! (non-normalized) email data is used during delivery. +//! +//! This crate also exposes convenience methods for regular expression searching +//! and processing/filtering of emails. +//! +//! # Email construction +//! +//! The [Email struct](struct.Email.html) is the basic abstraction of the `mda` +//! crate. To construct an Email use the +//! [Email::from_stdin](struct.Email.html#method.from_stdin) or +//! [Email::from_vec](struct.Email.html#method.from_vec) method. +//! +//! ```no_run +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! let email = Email::from_vec(vec![97, 98, 99])?; +//! # Ok::<(), Box<dyn std::error::Error>>(()) +//! ``` +//! +//! # Email delivery +//! +//! Use the +//! [Email::deliver_to_maildir](struct.Email.html#method.deliver_to_maildir) +//! method to deliver the email to local maildir directories. Note that +//! the original (non-normalized) email data is used during delivery. +//! +//! ```no_run +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! email.deliver_to_maildir("/my/maildir/path")?; +//! email.deliver_to_maildir("/my/other/maildir/path")?; +//! # Ok::<(), Box<dyn std::error::Error>>(()) +//! ``` +//! +//! # Accessing email header fields +//! +//! Use the [Email::header_field](struct.Email.html#method.header_field) and +//! [Email::header_field_all_occurrences](struct.Email.html#method.header_field_all_occurrences) +//! methods to access the email header fields. Any MIME encoded words in the +//! header field values are decoded and the field value is converted to UTF-8. +//! +//! ```no_run +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! let to = email.header_field("To").unwrap_or(""); +//! if to.contains("me@example.com") { +//! email.deliver_to_maildir("/my/maildir/path")?; +//! } +//! # Ok::<(), Box<dyn std::error::Error>>(()) +//! ``` +//! +//! # Searching with regular expressions +//! +//! The [EmailRegex](trait.EmailRegex.html) trait provides convenience methods +//! for searching the header, the body or the whole email with regular +//! expressions. The convenience functions use case-insensitive, multi-line +//! search (`^` and `$` match beginning and end of lines). If the above don't +//! match your needs, or you require additional functionality, you can perform +//! manual regex search using the email data. +//! +//! ```no_run +//! use mda::{Email, EmailRegex}; +//! let email = Email::from_stdin()?; +//! if email.header().search(r"^To:.*me@example.com")? { +//! email.deliver_to_maildir("/my/maildir/path")?; +//! } +//! # Ok::<(), Box<dyn std::error::Error>>(()) +//! ``` +//! +//! # Processing and filtering the email with external programs +//! +//! Use the [Email::filter](struct.Email.html#method.filter) and +//! [Email::from_stdin_filtered](struct.Email.html#method.from_stdin_filtered) +//! methods to filter the email, in both cases creating a new email. +//! +//! ```no_run +//! use mda::Email; +//! // Filtering directly from stdin is more efficient. +//! let email = Email::from_stdin_filtered(&["bogofilter", "-ep"])?; +//! let bogosity = email.header_field("X-Bogosity").unwrap_or(""); +//! if bogosity.contains("Spam, tests=bogofilter") { +//! email.deliver_to_maildir("/my/spam/path")?; +//! } +//! // We can also filter at any other time. +//! let email = email.filter(&["bogofilter", "-ep"])?; +//! # Ok::<(), Box<dyn std::error::Error>>(()) +//! ``` +//! +//! To perform more general processing use the +//! [Email::process](struct.Email.html#method.process) +//! method: +//! +//! ```no_run +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! let output = email.process(&["bogofilter"])?; +//! if let Some(0) = output.status.code() { +//! email.deliver_to_maildir("/my/spam/path")?; +//! } +//! # Ok::<(), Box<dyn std::error::Error>>(()) +//! ``` +//! +//! # Access to byte data +//! +//! Use the [Email::header](struct.Email.html#method.header), +//! [Email::body](struct.Email.html#method.body), +//! [Email::data](struct.Email.html#method.data) methods to access the +//! normalized byte data of the header, body and whole email respectively. +//! +//! Normalization involves ensuring header fields are in single lines, decoding +//! text parts of the message that use some kind of transfer encoding (e.g., +//! base64), and converting all text to UTF-8 character encoding. +//! +//! If for some reason you need access to non-normalized data use +//! [Email::raw_data](struct.Email.html#method.raw_data). +//! +//! ```no_run +//! use std::str; +//! use mda::Email; +//! let email = Email::from_stdin()?; +//! let body_str = String::from_utf8_lossy(email.header()); +//! +//! if body_str.contains("FREE BEER") { +//! email.deliver_to_maildir("/my/spam/path")?; +//! } +//! # Ok::<(), Box<dyn std::error::Error>>(()) +//! ``` +//! +//! # Decide delivery durability vs speed trade-off +//! +//! Use the [Email::set_delivery_durability](struct.Email.html#method.set_delivery_durability) +//! to decide which [DeliveryDurability](enum.DeliveryDurability.html) method to use. +//! By default the most durable (but also slower) method is used. +//! +//! ```no_run +//! use mda::{Email, DeliveryDurability}; +//! let mut email = Email::from_stdin()?; +//! email.set_delivery_durability(DeliveryDurability::FileSyncOnly); +//! # Ok::<(), Box<dyn std::error::Error>>(()) +//! ``` + +mod deliver; +mod regex; +mod processing; +mod normalize; +mod decode; + +use std::io; +use std::io::prelude::*; +use std::path::{PathBuf, Path}; +use std::sync:: {Arc, Mutex, RwLock}; +use std::collections::HashMap; + +use deliver::{Maildir, EmailFilenameGenerator}; +use normalize::normalize_email; + +pub use crate::regex::EmailRegex; + +pub type Result<T> = std::result::Result<T, Box<dyn std::error::Error>>; + +fn find_empty_line(data: &[u8]) -> Option<usize> { + data.windows(2).position(|w| w[0]== b'\n' && (w[1] == b'\n' || w[1] == b'\r')) +} + +/// The method to use to try to guarantee durable email delivery. +#[derive(PartialEq, Copy, Clone)] +pub enum DeliveryDurability { + /// Perform both file and directory syncing during delivery. + /// This is the default delivery durability method. + FileAndDirSync, + /// Perform only file sync during delivery. This method is + /// potentially much faster, and is used by many existing + /// MDAs, but, depending on the used filesystem, may not + /// provide the required delivery durability guarantees. + FileSyncOnly, +} + +/// A representation of an email. +pub struct Email { + data: Vec<u8>, + normalized_data: Vec<u8>, + body_index: usize, + deliver_path: RwLock<Option<PathBuf>>, + fields: HashMap<String, Vec<String>>, + email_filename_gen: Arc<Mutex<EmailFilenameGenerator>>, + delivery_durability: DeliveryDurability, +} + +impl Email { + /// Creates an `Email` by reading data from stdin. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + pub fn from_stdin() -> Result<Self> { + let stdin = io::stdin(); + let mut data = Vec::new(); + stdin.lock().read_to_end(&mut data)?; + Email::from_vec(data) + } + + /// Creates an `Email` by using data passed in a `Vec<u8>`. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_vec(vec![1, 2, 3])?; + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + pub fn from_vec(data: Vec<u8>) -> Result<Self> { + let (normalized_data, fields) = normalize_email(&data); + let body_index = find_empty_line(&normalized_data).unwrap_or(normalized_data.len()); + let email_filename_gen = Arc::new(Mutex::new(EmailFilenameGenerator::new())); + + Ok( + Email{ + data: data, + normalized_data: normalized_data, + body_index: body_index, + deliver_path: RwLock::new(None), + fields: fields, + email_filename_gen: email_filename_gen, + delivery_durability: DeliveryDurability::FileAndDirSync, + } + ) + } + + /// Sets the durability method for delivery of this email. + /// + /// # Example + /// + /// ```no_run + /// # use mda::{DeliveryDurability, Email}; + /// let mut email = Email::from_stdin()?; + /// email.set_delivery_durability(DeliveryDurability::FileSyncOnly); + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + pub fn set_delivery_durability(&mut self, delivery_durability: DeliveryDurability) { + self.delivery_durability = delivery_durability; + } + + /// Returns the value of a header field, if present. If a field occurs + /// multiple times, the value of the first occurrence is returned. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// let to = email.header_field("To").unwrap_or(""); + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + pub fn header_field(&self, name: &str) -> Option<&str> { + self.fields.get(&name.to_lowercase()).map(|v| v[0].as_str()) + } + + /// Returns the values from all occurrences of a header field, if present. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// if let Some(all_received) = email.header_field_all_occurrences("Received") { + /// // process all_received + /// } + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + pub fn header_field_all_occurrences(&self, name: &str) -> Option<&Vec<String>> { + self.fields.get(&name.to_lowercase()).map(|v| v) + } + + /// Delivers the email to the specified maildir. If the maildir isn't + /// present it is created. + /// + /// The first delivery of an email involves writing the email data to + /// the target file, whereas subsequent deliveries try to use a hard link + /// to the first delivery, falling back to a normal write if needed. + /// + /// The email is delivered durably by syncing both the file and the + /// associated directories (`DeliveryDurability::FileAndDirSync`), + /// unless a different durability method is specified with + /// `set_delivery_durability`. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// email.deliver_to_maildir("/path/to/maildir/")?; + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + pub fn deliver_to_maildir(&self, path: impl AsRef<Path>) -> Result<PathBuf> { + self.deliver_to_maildir_path(path.as_ref()) + } + + fn deliver_to_maildir_path(&self, path: &Path) -> Result<PathBuf> { + let maildir = Maildir::open_or_create(&path, self.email_filename_gen.clone())?; + + if let Some(deliver_path) = self.deliver_path.read().unwrap().as_ref() { + let email_path_result = + maildir.deliver_with_hard_link( + deliver_path, + self.delivery_durability); + + if email_path_result.is_ok() { + return email_path_result; + } + } + + let email_path = maildir.deliver(&self.data, self.delivery_durability)?; + + *self.deliver_path.write().unwrap() = Some(email_path.clone()); + + Ok(email_path) + } + + /// Returns whether the email has been delivered to at least one maildir. + /// + /// # Example + /// + /// ```no_run + /// # use mda::Email; + /// let email = Email::from_stdin()?; + /// if !email.has_been_delivered() { + /// email.deliver_to_maildir("/fallback/maildir/")?; + /// } + /// # Ok::<(), Box<dyn std::error::Error>>(()) + /// ``` + pub fn has_been_delivered(&self) -> bool { + self.deliver_path.read().unwrap().is_some() + } + + /// Provides access to the normalized email byte data. + pub fn data(&self) -> &[u8] { + &self.normalized_data + } + + /// Provides access to the normalized email header byte data. + pub fn header(&self) -> &[u8] { + &self.normalized_data[..self.body_index] + } + + /// Provides access to the normalized email body byte data. + pub fn body(&self) -> &[u8] { + &self.normalized_data[self.body_index..] + } + + /// Provides access to the raw (non-normalized) email byte data. + pub fn raw_data(&self) -> &[u8] { + &self.data + } +} |